{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0092236423046697, "eval_steps": 500, "global_step": 122000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.272324936923522e-05, "grad_norm": 80462.0859375, "learning_rate": 8.080808080808081e-09, "loss": 6007.0633, "step": 10 }, { "epoch": 0.00016544649873847045, "grad_norm": 360423.46875, "learning_rate": 1.6161616161616162e-08, "loss": 16793.1375, "step": 20 }, { "epoch": 0.0002481697481077057, "grad_norm": 260210.0, "learning_rate": 2.4242424242424243e-08, "loss": 8807.757, "step": 30 }, { "epoch": 0.0003308929974769409, "grad_norm": 333457.03125, "learning_rate": 3.2323232323232324e-08, "loss": 6780.607, "step": 40 }, { "epoch": 0.0004136162468461761, "grad_norm": 240292.671875, "learning_rate": 4.040404040404041e-08, "loss": 10353.4664, "step": 50 }, { "epoch": 0.0004963394962154114, "grad_norm": 205636.09375, "learning_rate": 4.8484848484848486e-08, "loss": 9239.3687, "step": 60 }, { "epoch": 0.0005790627455846465, "grad_norm": 91692.9375, "learning_rate": 5.656565656565657e-08, "loss": 5793.2141, "step": 70 }, { "epoch": 0.0006617859949538818, "grad_norm": 128958.5859375, "learning_rate": 6.464646464646465e-08, "loss": 7645.5141, "step": 80 }, { "epoch": 0.000744509244323117, "grad_norm": 198792.46875, "learning_rate": 7.272727272727274e-08, "loss": 7540.1, "step": 90 }, { "epoch": 0.0008272324936923522, "grad_norm": 143177.3125, "learning_rate": 8.080808080808082e-08, "loss": 5863.3023, "step": 100 }, { "epoch": 0.0009099557430615875, "grad_norm": 69092.4453125, "learning_rate": 8.88888888888889e-08, "loss": 9028.2383, "step": 110 }, { "epoch": 0.0009926789924308227, "grad_norm": 135931.546875, "learning_rate": 9.696969696969697e-08, "loss": 7231.0602, "step": 120 }, { "epoch": 0.0010754022418000579, "grad_norm": 142479.5625, "learning_rate": 1.0505050505050506e-07, "loss": 4912.673, "step": 130 }, { "epoch": 0.001158125491169293, "grad_norm": 310996.5625, "learning_rate": 1.1313131313131314e-07, "loss": 4637.1719, "step": 140 }, { "epoch": 0.0012408487405385284, "grad_norm": 195854.375, "learning_rate": 1.2121212121212122e-07, "loss": 5191.2895, "step": 150 }, { "epoch": 0.0013235719899077636, "grad_norm": 98090.03125, "learning_rate": 1.292929292929293e-07, "loss": 6609.8664, "step": 160 }, { "epoch": 0.0014062952392769987, "grad_norm": 69154.9765625, "learning_rate": 1.3737373737373738e-07, "loss": 6437.2246, "step": 170 }, { "epoch": 0.001489018488646234, "grad_norm": 161934.78125, "learning_rate": 1.4545454545454548e-07, "loss": 7987.9906, "step": 180 }, { "epoch": 0.0015717417380154692, "grad_norm": 72492.5390625, "learning_rate": 1.5353535353535356e-07, "loss": 5339.8434, "step": 190 }, { "epoch": 0.0016544649873847044, "grad_norm": 28318.041015625, "learning_rate": 1.6161616161616163e-07, "loss": 3778.6113, "step": 200 }, { "epoch": 0.0017371882367539398, "grad_norm": 78588.4375, "learning_rate": 1.6969696969696974e-07, "loss": 5780.0262, "step": 210 }, { "epoch": 0.001819911486123175, "grad_norm": 69777.796875, "learning_rate": 1.777777777777778e-07, "loss": 3939.0852, "step": 220 }, { "epoch": 0.00190263473549241, "grad_norm": 26142.01953125, "learning_rate": 1.858585858585859e-07, "loss": 3253.984, "step": 230 }, { "epoch": 0.0019853579848616455, "grad_norm": 77199.8828125, "learning_rate": 1.9393939393939395e-07, "loss": 3297.4723, "step": 240 }, { "epoch": 0.0020680812342308806, "grad_norm": 79269.9921875, "learning_rate": 2.0202020202020205e-07, "loss": 3044.4252, "step": 250 }, { "epoch": 0.0021508044836001158, "grad_norm": 39357.45703125, "learning_rate": 2.1010101010101013e-07, "loss": 3016.1178, "step": 260 }, { "epoch": 0.002233527732969351, "grad_norm": 37892.91015625, "learning_rate": 2.181818181818182e-07, "loss": 2370.4621, "step": 270 }, { "epoch": 0.002316250982338586, "grad_norm": 40799.3984375, "learning_rate": 2.2626262626262628e-07, "loss": 2173.1178, "step": 280 }, { "epoch": 0.0023989742317078217, "grad_norm": 16129.89453125, "learning_rate": 2.343434343434344e-07, "loss": 2872.4018, "step": 290 }, { "epoch": 0.002481697481077057, "grad_norm": 12510.9287109375, "learning_rate": 2.4242424242424244e-07, "loss": 1385.131, "step": 300 }, { "epoch": 0.002564420730446292, "grad_norm": 21687.78515625, "learning_rate": 2.505050505050505e-07, "loss": 1238.8357, "step": 310 }, { "epoch": 0.002647143979815527, "grad_norm": 26216.89453125, "learning_rate": 2.585858585858586e-07, "loss": 1459.9193, "step": 320 }, { "epoch": 0.0027298672291847623, "grad_norm": 54759.859375, "learning_rate": 2.666666666666667e-07, "loss": 1641.5232, "step": 330 }, { "epoch": 0.0028125904785539974, "grad_norm": 11365.4833984375, "learning_rate": 2.7474747474747475e-07, "loss": 1190.3224, "step": 340 }, { "epoch": 0.002895313727923233, "grad_norm": 16354.1533203125, "learning_rate": 2.828282828282829e-07, "loss": 885.4668, "step": 350 }, { "epoch": 0.002978036977292468, "grad_norm": 11645.587890625, "learning_rate": 2.9090909090909096e-07, "loss": 1400.4568, "step": 360 }, { "epoch": 0.0030607602266617033, "grad_norm": 8614.1328125, "learning_rate": 2.9898989898989904e-07, "loss": 814.0722, "step": 370 }, { "epoch": 0.0031434834760309385, "grad_norm": 11007.8818359375, "learning_rate": 3.070707070707071e-07, "loss": 970.882, "step": 380 }, { "epoch": 0.0032262067254001736, "grad_norm": 6238.4267578125, "learning_rate": 3.151515151515152e-07, "loss": 946.8882, "step": 390 }, { "epoch": 0.003308929974769409, "grad_norm": 5815.009765625, "learning_rate": 3.2323232323232327e-07, "loss": 792.6276, "step": 400 }, { "epoch": 0.003391653224138644, "grad_norm": 34222.5390625, "learning_rate": 3.3131313131313135e-07, "loss": 787.8205, "step": 410 }, { "epoch": 0.0034743764735078795, "grad_norm": 4778.3984375, "learning_rate": 3.393939393939395e-07, "loss": 653.9669, "step": 420 }, { "epoch": 0.0035570997228771147, "grad_norm": 2678.25244140625, "learning_rate": 3.474747474747475e-07, "loss": 793.3054, "step": 430 }, { "epoch": 0.00363982297224635, "grad_norm": 1457.694580078125, "learning_rate": 3.555555555555556e-07, "loss": 630.5825, "step": 440 }, { "epoch": 0.003722546221615585, "grad_norm": 5033.21923828125, "learning_rate": 3.6363636363636366e-07, "loss": 759.3085, "step": 450 }, { "epoch": 0.00380526947098482, "grad_norm": 3704.6376953125, "learning_rate": 3.717171717171718e-07, "loss": 644.2665, "step": 460 }, { "epoch": 0.0038879927203540553, "grad_norm": 5654.28662109375, "learning_rate": 3.7979797979797987e-07, "loss": 710.7244, "step": 470 }, { "epoch": 0.003970715969723291, "grad_norm": 1483.4691162109375, "learning_rate": 3.878787878787879e-07, "loss": 694.1961, "step": 480 }, { "epoch": 0.004053439219092526, "grad_norm": 2019.6363525390625, "learning_rate": 3.9595959595959597e-07, "loss": 573.5805, "step": 490 }, { "epoch": 0.004136162468461761, "grad_norm": 1998.4979248046875, "learning_rate": 4.040404040404041e-07, "loss": 624.7649, "step": 500 }, { "epoch": 0.004218885717830996, "grad_norm": 2387.564453125, "learning_rate": 4.121212121212122e-07, "loss": 626.6972, "step": 510 }, { "epoch": 0.0043016089672002315, "grad_norm": 3881.398681640625, "learning_rate": 4.2020202020202026e-07, "loss": 759.5687, "step": 520 }, { "epoch": 0.004384332216569467, "grad_norm": 1924.41845703125, "learning_rate": 4.282828282828283e-07, "loss": 700.7449, "step": 530 }, { "epoch": 0.004467055465938702, "grad_norm": 2266.10693359375, "learning_rate": 4.363636363636364e-07, "loss": 640.943, "step": 540 }, { "epoch": 0.004549778715307937, "grad_norm": 7472.12548828125, "learning_rate": 4.444444444444445e-07, "loss": 806.1628, "step": 550 }, { "epoch": 0.004632501964677172, "grad_norm": 1539.118408203125, "learning_rate": 4.5252525252525257e-07, "loss": 556.9574, "step": 560 }, { "epoch": 0.004715225214046408, "grad_norm": 1867.2452392578125, "learning_rate": 4.6060606060606064e-07, "loss": 453.8387, "step": 570 }, { "epoch": 0.004797948463415643, "grad_norm": 6622.47119140625, "learning_rate": 4.686868686868688e-07, "loss": 585.511, "step": 580 }, { "epoch": 0.0048806717127848785, "grad_norm": 1117.4276123046875, "learning_rate": 4.767676767676768e-07, "loss": 591.8928, "step": 590 }, { "epoch": 0.004963394962154114, "grad_norm": 1792.2279052734375, "learning_rate": 4.848484848484849e-07, "loss": 659.099, "step": 600 }, { "epoch": 0.005046118211523349, "grad_norm": 952.1702880859375, "learning_rate": 4.929292929292929e-07, "loss": 558.1813, "step": 610 }, { "epoch": 0.005128841460892584, "grad_norm": 1103.0098876953125, "learning_rate": 5.01010101010101e-07, "loss": 488.6359, "step": 620 }, { "epoch": 0.005211564710261819, "grad_norm": 2055.734130859375, "learning_rate": 5.090909090909092e-07, "loss": 611.8516, "step": 630 }, { "epoch": 0.005294287959631054, "grad_norm": 1028.85986328125, "learning_rate": 5.171717171717172e-07, "loss": 568.1, "step": 640 }, { "epoch": 0.005377011209000289, "grad_norm": 1175.14892578125, "learning_rate": 5.252525252525253e-07, "loss": 407.9692, "step": 650 }, { "epoch": 0.005459734458369525, "grad_norm": 1459.8382568359375, "learning_rate": 5.333333333333335e-07, "loss": 496.3047, "step": 660 }, { "epoch": 0.00554245770773876, "grad_norm": 3791.671630859375, "learning_rate": 5.414141414141415e-07, "loss": 492.2279, "step": 670 }, { "epoch": 0.005625180957107995, "grad_norm": 1465.2242431640625, "learning_rate": 5.494949494949495e-07, "loss": 518.7601, "step": 680 }, { "epoch": 0.00570790420647723, "grad_norm": 1057.6175537109375, "learning_rate": 5.575757575757576e-07, "loss": 496.9481, "step": 690 }, { "epoch": 0.005790627455846466, "grad_norm": 1338.5599365234375, "learning_rate": 5.656565656565658e-07, "loss": 493.3083, "step": 700 }, { "epoch": 0.005873350705215701, "grad_norm": 1229.9342041015625, "learning_rate": 5.737373737373738e-07, "loss": 507.9572, "step": 710 }, { "epoch": 0.005956073954584936, "grad_norm": 2031.885498046875, "learning_rate": 5.818181818181819e-07, "loss": 545.9699, "step": 720 }, { "epoch": 0.0060387972039541715, "grad_norm": 1317.18408203125, "learning_rate": 5.898989898989899e-07, "loss": 515.3469, "step": 730 }, { "epoch": 0.006121520453323407, "grad_norm": 1839.4345703125, "learning_rate": 5.979797979797981e-07, "loss": 377.9613, "step": 740 }, { "epoch": 0.006204243702692642, "grad_norm": 1210.0321044921875, "learning_rate": 6.060606060606061e-07, "loss": 465.0116, "step": 750 }, { "epoch": 0.006286966952061877, "grad_norm": 2228.900390625, "learning_rate": 6.141414141414142e-07, "loss": 336.6599, "step": 760 }, { "epoch": 0.006369690201431112, "grad_norm": 1690.2906494140625, "learning_rate": 6.222222222222223e-07, "loss": 402.5558, "step": 770 }, { "epoch": 0.006452413450800347, "grad_norm": 1048.852783203125, "learning_rate": 6.303030303030304e-07, "loss": 460.7069, "step": 780 }, { "epoch": 0.0065351367001695825, "grad_norm": 1623.499755859375, "learning_rate": 6.383838383838384e-07, "loss": 322.7708, "step": 790 }, { "epoch": 0.006617859949538818, "grad_norm": 4279.55908203125, "learning_rate": 6.464646464646465e-07, "loss": 430.4449, "step": 800 }, { "epoch": 0.006700583198908053, "grad_norm": 1473.4871826171875, "learning_rate": 6.545454545454547e-07, "loss": 474.9226, "step": 810 }, { "epoch": 0.006783306448277288, "grad_norm": 1544.477783203125, "learning_rate": 6.626262626262627e-07, "loss": 459.8634, "step": 820 }, { "epoch": 0.006866029697646524, "grad_norm": 991.1500854492188, "learning_rate": 6.707070707070708e-07, "loss": 339.08, "step": 830 }, { "epoch": 0.006948752947015759, "grad_norm": 1124.6473388671875, "learning_rate": 6.78787878787879e-07, "loss": 476.3061, "step": 840 }, { "epoch": 0.007031476196384994, "grad_norm": 1139.6890869140625, "learning_rate": 6.868686868686869e-07, "loss": 504.3425, "step": 850 }, { "epoch": 0.007114199445754229, "grad_norm": 1275.02294921875, "learning_rate": 6.94949494949495e-07, "loss": 431.0188, "step": 860 }, { "epoch": 0.0071969226951234646, "grad_norm": 1159.5965576171875, "learning_rate": 7.03030303030303e-07, "loss": 455.7138, "step": 870 }, { "epoch": 0.0072796459444927, "grad_norm": 1401.516357421875, "learning_rate": 7.111111111111112e-07, "loss": 529.1278, "step": 880 }, { "epoch": 0.007362369193861935, "grad_norm": 1474.188720703125, "learning_rate": 7.191919191919193e-07, "loss": 326.5665, "step": 890 }, { "epoch": 0.00744509244323117, "grad_norm": 917.8528442382812, "learning_rate": 7.272727272727273e-07, "loss": 436.4797, "step": 900 }, { "epoch": 0.007527815692600405, "grad_norm": 941.6436157226562, "learning_rate": 7.353535353535354e-07, "loss": 357.6453, "step": 910 }, { "epoch": 0.00761053894196964, "grad_norm": 934.293701171875, "learning_rate": 7.434343434343436e-07, "loss": 433.9422, "step": 920 }, { "epoch": 0.0076932621913388755, "grad_norm": 1530.59814453125, "learning_rate": 7.515151515151516e-07, "loss": 506.7857, "step": 930 }, { "epoch": 0.007775985440708111, "grad_norm": 3313.063720703125, "learning_rate": 7.595959595959597e-07, "loss": 386.7442, "step": 940 }, { "epoch": 0.007858708690077347, "grad_norm": 9505.775390625, "learning_rate": 7.676767676767677e-07, "loss": 393.8395, "step": 950 }, { "epoch": 0.007941431939446582, "grad_norm": 9777.1015625, "learning_rate": 7.757575757575758e-07, "loss": 444.2824, "step": 960 }, { "epoch": 0.008024155188815817, "grad_norm": 952.7528686523438, "learning_rate": 7.838383838383839e-07, "loss": 463.8062, "step": 970 }, { "epoch": 0.008106878438185052, "grad_norm": 1653.4957275390625, "learning_rate": 7.919191919191919e-07, "loss": 422.011, "step": 980 }, { "epoch": 0.008189601687554287, "grad_norm": 1222.734619140625, "learning_rate": 8.000000000000001e-07, "loss": 370.3142, "step": 990 }, { "epoch": 0.008272324936923522, "grad_norm": 1304.0595703125, "learning_rate": 8.080808080808082e-07, "loss": 423.2224, "step": 1000 }, { "epoch": 0.008355048186292758, "grad_norm": 1451.695556640625, "learning_rate": 8.161616161616162e-07, "loss": 333.3123, "step": 1010 }, { "epoch": 0.008437771435661993, "grad_norm": 1295.0885009765625, "learning_rate": 8.242424242424244e-07, "loss": 395.7991, "step": 1020 }, { "epoch": 0.008520494685031228, "grad_norm": 24490.919921875, "learning_rate": 8.323232323232324e-07, "loss": 484.7744, "step": 1030 }, { "epoch": 0.008603217934400463, "grad_norm": 1630.39306640625, "learning_rate": 8.404040404040405e-07, "loss": 507.0378, "step": 1040 }, { "epoch": 0.008685941183769698, "grad_norm": 1444.76708984375, "learning_rate": 8.484848484848486e-07, "loss": 424.0987, "step": 1050 }, { "epoch": 0.008768664433138933, "grad_norm": 9745.6103515625, "learning_rate": 8.565656565656566e-07, "loss": 356.5501, "step": 1060 }, { "epoch": 0.008851387682508169, "grad_norm": 1603.8682861328125, "learning_rate": 8.646464646464647e-07, "loss": 363.1298, "step": 1070 }, { "epoch": 0.008934110931877404, "grad_norm": 1848.8529052734375, "learning_rate": 8.727272727272728e-07, "loss": 453.0991, "step": 1080 }, { "epoch": 0.009016834181246639, "grad_norm": 1721.5032958984375, "learning_rate": 8.808080808080808e-07, "loss": 352.8759, "step": 1090 }, { "epoch": 0.009099557430615874, "grad_norm": 1215.8017578125, "learning_rate": 8.88888888888889e-07, "loss": 358.2751, "step": 1100 }, { "epoch": 0.00918228067998511, "grad_norm": 3627.594482421875, "learning_rate": 8.96969696969697e-07, "loss": 373.2328, "step": 1110 }, { "epoch": 0.009265003929354344, "grad_norm": 1093.44677734375, "learning_rate": 9.050505050505051e-07, "loss": 305.7944, "step": 1120 }, { "epoch": 0.00934772717872358, "grad_norm": 5584.5224609375, "learning_rate": 9.131313131313133e-07, "loss": 371.0138, "step": 1130 }, { "epoch": 0.009430450428092816, "grad_norm": 1449.30419921875, "learning_rate": 9.212121212121213e-07, "loss": 374.5949, "step": 1140 }, { "epoch": 0.009513173677462052, "grad_norm": 1450.5250244140625, "learning_rate": 9.292929292929294e-07, "loss": 370.9519, "step": 1150 }, { "epoch": 0.009595896926831287, "grad_norm": 1153.1729736328125, "learning_rate": 9.373737373737376e-07, "loss": 311.7418, "step": 1160 }, { "epoch": 0.009678620176200522, "grad_norm": 1182.4305419921875, "learning_rate": 9.454545454545455e-07, "loss": 340.1112, "step": 1170 }, { "epoch": 0.009761343425569757, "grad_norm": 2788.3564453125, "learning_rate": 9.535353535353536e-07, "loss": 402.735, "step": 1180 }, { "epoch": 0.009844066674938992, "grad_norm": 1240.61669921875, "learning_rate": 9.616161616161617e-07, "loss": 430.115, "step": 1190 }, { "epoch": 0.009926789924308227, "grad_norm": 7811.59228515625, "learning_rate": 9.696969696969698e-07, "loss": 294.6839, "step": 1200 }, { "epoch": 0.010009513173677462, "grad_norm": 1490.346923828125, "learning_rate": 9.77777777777778e-07, "loss": 314.4158, "step": 1210 }, { "epoch": 0.010092236423046698, "grad_norm": 1153.1387939453125, "learning_rate": 9.858585858585858e-07, "loss": 422.2901, "step": 1220 }, { "epoch": 0.010174959672415933, "grad_norm": 1359.1773681640625, "learning_rate": 9.93939393939394e-07, "loss": 396.2289, "step": 1230 }, { "epoch": 0.010257682921785168, "grad_norm": 1238.34814453125, "learning_rate": 1.002020202020202e-06, "loss": 432.7112, "step": 1240 }, { "epoch": 0.010340406171154403, "grad_norm": 1150.58154296875, "learning_rate": 1.01010101010101e-06, "loss": 533.3193, "step": 1250 }, { "epoch": 0.010423129420523638, "grad_norm": 2107.6708984375, "learning_rate": 1.0181818181818183e-06, "loss": 386.3309, "step": 1260 }, { "epoch": 0.010505852669892873, "grad_norm": 1610.2342529296875, "learning_rate": 1.0262626262626264e-06, "loss": 378.974, "step": 1270 }, { "epoch": 0.010588575919262109, "grad_norm": 10111.8642578125, "learning_rate": 1.0343434343434344e-06, "loss": 440.8467, "step": 1280 }, { "epoch": 0.010671299168631344, "grad_norm": 3038.983642578125, "learning_rate": 1.0424242424242426e-06, "loss": 531.3744, "step": 1290 }, { "epoch": 0.010754022418000579, "grad_norm": 2078.825927734375, "learning_rate": 1.0505050505050506e-06, "loss": 358.3208, "step": 1300 }, { "epoch": 0.010836745667369814, "grad_norm": 1494.41943359375, "learning_rate": 1.0585858585858587e-06, "loss": 368.8838, "step": 1310 }, { "epoch": 0.01091946891673905, "grad_norm": 913.7642211914062, "learning_rate": 1.066666666666667e-06, "loss": 343.7515, "step": 1320 }, { "epoch": 0.011002192166108284, "grad_norm": 1475.224853515625, "learning_rate": 1.0747474747474747e-06, "loss": 311.1521, "step": 1330 }, { "epoch": 0.01108491541547752, "grad_norm": 6028.0986328125, "learning_rate": 1.082828282828283e-06, "loss": 346.5072, "step": 1340 }, { "epoch": 0.011167638664846755, "grad_norm": 1047.4739990234375, "learning_rate": 1.090909090909091e-06, "loss": 371.3439, "step": 1350 }, { "epoch": 0.01125036191421599, "grad_norm": 2157.152099609375, "learning_rate": 1.098989898989899e-06, "loss": 396.0025, "step": 1360 }, { "epoch": 0.011333085163585225, "grad_norm": 1551.3135986328125, "learning_rate": 1.1070707070707072e-06, "loss": 423.25, "step": 1370 }, { "epoch": 0.01141580841295446, "grad_norm": 1080.8287353515625, "learning_rate": 1.1151515151515153e-06, "loss": 255.2609, "step": 1380 }, { "epoch": 0.011498531662323695, "grad_norm": 835.9887084960938, "learning_rate": 1.1232323232323233e-06, "loss": 284.983, "step": 1390 }, { "epoch": 0.011581254911692932, "grad_norm": 774.7077026367188, "learning_rate": 1.1313131313131315e-06, "loss": 274.2369, "step": 1400 }, { "epoch": 0.011663978161062167, "grad_norm": 1325.56982421875, "learning_rate": 1.1393939393939395e-06, "loss": 797.6222, "step": 1410 }, { "epoch": 0.011746701410431402, "grad_norm": 1981.0302734375, "learning_rate": 1.1474747474747476e-06, "loss": 366.0973, "step": 1420 }, { "epoch": 0.011829424659800638, "grad_norm": 2644.739501953125, "learning_rate": 1.1555555555555556e-06, "loss": 327.9804, "step": 1430 }, { "epoch": 0.011912147909169873, "grad_norm": 1665.543212890625, "learning_rate": 1.1636363636363638e-06, "loss": 425.4298, "step": 1440 }, { "epoch": 0.011994871158539108, "grad_norm": 1862.1712646484375, "learning_rate": 1.1717171717171719e-06, "loss": 324.925, "step": 1450 }, { "epoch": 0.012077594407908343, "grad_norm": 1164.2347412109375, "learning_rate": 1.1797979797979799e-06, "loss": 298.3789, "step": 1460 }, { "epoch": 0.012160317657277578, "grad_norm": 1282.484375, "learning_rate": 1.187878787878788e-06, "loss": 399.5682, "step": 1470 }, { "epoch": 0.012243040906646813, "grad_norm": 1316.5758056640625, "learning_rate": 1.1959595959595961e-06, "loss": 338.0538, "step": 1480 }, { "epoch": 0.012325764156016049, "grad_norm": 875.5802001953125, "learning_rate": 1.2040404040404042e-06, "loss": 308.0409, "step": 1490 }, { "epoch": 0.012408487405385284, "grad_norm": 1360.3880615234375, "learning_rate": 1.2121212121212122e-06, "loss": 292.9588, "step": 1500 }, { "epoch": 0.012491210654754519, "grad_norm": 1129.11083984375, "learning_rate": 1.2202020202020202e-06, "loss": 322.4768, "step": 1510 }, { "epoch": 0.012573933904123754, "grad_norm": 2066.356689453125, "learning_rate": 1.2282828282828285e-06, "loss": 271.5606, "step": 1520 }, { "epoch": 0.012656657153492989, "grad_norm": 1669.0103759765625, "learning_rate": 1.2363636363636365e-06, "loss": 283.8805, "step": 1530 }, { "epoch": 0.012739380402862224, "grad_norm": 2613.5205078125, "learning_rate": 1.2444444444444445e-06, "loss": 376.5511, "step": 1540 }, { "epoch": 0.01282210365223146, "grad_norm": 1188.38427734375, "learning_rate": 1.2525252525252527e-06, "loss": 429.8225, "step": 1550 }, { "epoch": 0.012904826901600695, "grad_norm": 1084.84814453125, "learning_rate": 1.2606060606060608e-06, "loss": 289.73, "step": 1560 }, { "epoch": 0.01298755015096993, "grad_norm": 2206.3232421875, "learning_rate": 1.268686868686869e-06, "loss": 316.7855, "step": 1570 }, { "epoch": 0.013070273400339165, "grad_norm": 2453.128662109375, "learning_rate": 1.2767676767676768e-06, "loss": 304.9484, "step": 1580 }, { "epoch": 0.0131529966497084, "grad_norm": 976.6162719726562, "learning_rate": 1.2848484848484848e-06, "loss": 303.9162, "step": 1590 }, { "epoch": 0.013235719899077635, "grad_norm": 1493.1630859375, "learning_rate": 1.292929292929293e-06, "loss": 356.7861, "step": 1600 }, { "epoch": 0.01331844314844687, "grad_norm": 1568.7215576171875, "learning_rate": 1.301010101010101e-06, "loss": 556.4589, "step": 1610 }, { "epoch": 0.013401166397816106, "grad_norm": 2514.0361328125, "learning_rate": 1.3090909090909093e-06, "loss": 414.0774, "step": 1620 }, { "epoch": 0.01348388964718534, "grad_norm": 1089.7327880859375, "learning_rate": 1.3171717171717172e-06, "loss": 330.1746, "step": 1630 }, { "epoch": 0.013566612896554576, "grad_norm": 1226.1904296875, "learning_rate": 1.3252525252525254e-06, "loss": 408.1962, "step": 1640 }, { "epoch": 0.013649336145923813, "grad_norm": 2240.143310546875, "learning_rate": 1.3333333333333334e-06, "loss": 344.259, "step": 1650 }, { "epoch": 0.013732059395293048, "grad_norm": 1064.2305908203125, "learning_rate": 1.3414141414141417e-06, "loss": 348.617, "step": 1660 }, { "epoch": 0.013814782644662283, "grad_norm": 1232.1429443359375, "learning_rate": 1.3494949494949497e-06, "loss": 406.3739, "step": 1670 }, { "epoch": 0.013897505894031518, "grad_norm": 1123.156982421875, "learning_rate": 1.357575757575758e-06, "loss": 396.9652, "step": 1680 }, { "epoch": 0.013980229143400753, "grad_norm": 1280.202880859375, "learning_rate": 1.3656565656565657e-06, "loss": 391.5166, "step": 1690 }, { "epoch": 0.014062952392769988, "grad_norm": 1420.6954345703125, "learning_rate": 1.3737373737373738e-06, "loss": 437.0295, "step": 1700 }, { "epoch": 0.014145675642139224, "grad_norm": 2150.417236328125, "learning_rate": 1.381818181818182e-06, "loss": 346.5058, "step": 1710 }, { "epoch": 0.014228398891508459, "grad_norm": 1029.7105712890625, "learning_rate": 1.38989898989899e-06, "loss": 326.7657, "step": 1720 }, { "epoch": 0.014311122140877694, "grad_norm": 1207.562744140625, "learning_rate": 1.3979797979797982e-06, "loss": 279.2056, "step": 1730 }, { "epoch": 0.014393845390246929, "grad_norm": 919.50244140625, "learning_rate": 1.406060606060606e-06, "loss": 264.9025, "step": 1740 }, { "epoch": 0.014476568639616164, "grad_norm": 1641.6435546875, "learning_rate": 1.4141414141414143e-06, "loss": 239.4691, "step": 1750 }, { "epoch": 0.0145592918889854, "grad_norm": 1205.822998046875, "learning_rate": 1.4222222222222223e-06, "loss": 333.4085, "step": 1760 }, { "epoch": 0.014642015138354635, "grad_norm": 1508.9305419921875, "learning_rate": 1.4303030303030306e-06, "loss": 300.8089, "step": 1770 }, { "epoch": 0.01472473838772387, "grad_norm": 1087.9306640625, "learning_rate": 1.4383838383838386e-06, "loss": 292.6564, "step": 1780 }, { "epoch": 0.014807461637093105, "grad_norm": 2015.249755859375, "learning_rate": 1.4464646464646464e-06, "loss": 277.2897, "step": 1790 }, { "epoch": 0.01489018488646234, "grad_norm": 4033.002685546875, "learning_rate": 1.4545454545454546e-06, "loss": 336.2396, "step": 1800 }, { "epoch": 0.014972908135831575, "grad_norm": 1528.888916015625, "learning_rate": 1.4626262626262627e-06, "loss": 310.2283, "step": 1810 }, { "epoch": 0.01505563138520081, "grad_norm": 1040.3310546875, "learning_rate": 1.470707070707071e-06, "loss": 296.3051, "step": 1820 }, { "epoch": 0.015138354634570046, "grad_norm": 3019.808837890625, "learning_rate": 1.478787878787879e-06, "loss": 278.5342, "step": 1830 }, { "epoch": 0.01522107788393928, "grad_norm": 1161.161376953125, "learning_rate": 1.4868686868686872e-06, "loss": 357.7166, "step": 1840 }, { "epoch": 0.015303801133308516, "grad_norm": 2254.221435546875, "learning_rate": 1.494949494949495e-06, "loss": 325.2632, "step": 1850 }, { "epoch": 0.015386524382677751, "grad_norm": 3298.8662109375, "learning_rate": 1.5030303030303032e-06, "loss": 340.6655, "step": 1860 }, { "epoch": 0.015469247632046986, "grad_norm": 1297.3563232421875, "learning_rate": 1.5111111111111112e-06, "loss": 327.4712, "step": 1870 }, { "epoch": 0.015551970881416221, "grad_norm": 1163.013671875, "learning_rate": 1.5191919191919195e-06, "loss": 302.4008, "step": 1880 }, { "epoch": 0.015634694130785456, "grad_norm": 3574.491943359375, "learning_rate": 1.5272727272727275e-06, "loss": 304.6065, "step": 1890 }, { "epoch": 0.015717417380154693, "grad_norm": 953.6083984375, "learning_rate": 1.5353535353535353e-06, "loss": 283.16, "step": 1900 }, { "epoch": 0.015800140629523927, "grad_norm": 579.375732421875, "learning_rate": 1.5434343434343435e-06, "loss": 294.0983, "step": 1910 }, { "epoch": 0.015882863878893164, "grad_norm": 904.5838012695312, "learning_rate": 1.5515151515151516e-06, "loss": 277.4686, "step": 1920 }, { "epoch": 0.015965587128262397, "grad_norm": 878.8187255859375, "learning_rate": 1.5595959595959598e-06, "loss": 348.8653, "step": 1930 }, { "epoch": 0.016048310377631634, "grad_norm": 1161.1363525390625, "learning_rate": 1.5676767676767678e-06, "loss": 374.7675, "step": 1940 }, { "epoch": 0.016131033627000867, "grad_norm": 1667.3831787109375, "learning_rate": 1.5757575757575759e-06, "loss": 359.4971, "step": 1950 }, { "epoch": 0.016213756876370104, "grad_norm": 2197.3330078125, "learning_rate": 1.5838383838383839e-06, "loss": 367.9104, "step": 1960 }, { "epoch": 0.016296480125739338, "grad_norm": 1425.8123779296875, "learning_rate": 1.5919191919191921e-06, "loss": 367.0379, "step": 1970 }, { "epoch": 0.016379203375108575, "grad_norm": 4604.79052734375, "learning_rate": 1.6000000000000001e-06, "loss": 462.412, "step": 1980 }, { "epoch": 0.016461926624477808, "grad_norm": 1806.8238525390625, "learning_rate": 1.6080808080808084e-06, "loss": 350.7875, "step": 1990 }, { "epoch": 0.016544649873847045, "grad_norm": 1519.6781005859375, "learning_rate": 1.6161616161616164e-06, "loss": 272.1075, "step": 2000 }, { "epoch": 0.01662737312321628, "grad_norm": 2001.111572265625, "learning_rate": 1.6242424242424242e-06, "loss": 264.7772, "step": 2010 }, { "epoch": 0.016710096372585515, "grad_norm": 2383.3994140625, "learning_rate": 1.6323232323232325e-06, "loss": 293.2167, "step": 2020 }, { "epoch": 0.016792819621954752, "grad_norm": 670.5611572265625, "learning_rate": 1.6404040404040405e-06, "loss": 379.9431, "step": 2030 }, { "epoch": 0.016875542871323986, "grad_norm": 1432.4300537109375, "learning_rate": 1.6484848484848487e-06, "loss": 362.749, "step": 2040 }, { "epoch": 0.016958266120693222, "grad_norm": 6461.77880859375, "learning_rate": 1.6565656565656567e-06, "loss": 407.5534, "step": 2050 }, { "epoch": 0.017040989370062456, "grad_norm": 1288.83544921875, "learning_rate": 1.6646464646464648e-06, "loss": 315.7526, "step": 2060 }, { "epoch": 0.017123712619431693, "grad_norm": 907.4447021484375, "learning_rate": 1.6727272727272728e-06, "loss": 292.4799, "step": 2070 }, { "epoch": 0.017206435868800926, "grad_norm": 1921.0576171875, "learning_rate": 1.680808080808081e-06, "loss": 297.3491, "step": 2080 }, { "epoch": 0.017289159118170163, "grad_norm": 1620.2481689453125, "learning_rate": 1.688888888888889e-06, "loss": 291.1318, "step": 2090 }, { "epoch": 0.017371882367539396, "grad_norm": 892.2626342773438, "learning_rate": 1.6969696969696973e-06, "loss": 337.7454, "step": 2100 }, { "epoch": 0.017454605616908633, "grad_norm": 1724.4583740234375, "learning_rate": 1.705050505050505e-06, "loss": 386.2352, "step": 2110 }, { "epoch": 0.017537328866277867, "grad_norm": 1373.0406494140625, "learning_rate": 1.7131313131313131e-06, "loss": 312.9512, "step": 2120 }, { "epoch": 0.017620052115647104, "grad_norm": 1802.06884765625, "learning_rate": 1.7212121212121214e-06, "loss": 392.6038, "step": 2130 }, { "epoch": 0.017702775365016337, "grad_norm": 1823.2747802734375, "learning_rate": 1.7292929292929294e-06, "loss": 276.9635, "step": 2140 }, { "epoch": 0.017785498614385574, "grad_norm": 2303.46484375, "learning_rate": 1.7373737373737376e-06, "loss": 363.1688, "step": 2150 }, { "epoch": 0.017868221863754807, "grad_norm": 939.9644165039062, "learning_rate": 1.7454545454545456e-06, "loss": 371.3494, "step": 2160 }, { "epoch": 0.017950945113124044, "grad_norm": 803.9099731445312, "learning_rate": 1.7535353535353537e-06, "loss": 325.9621, "step": 2170 }, { "epoch": 0.018033668362493278, "grad_norm": 1147.0989990234375, "learning_rate": 1.7616161616161617e-06, "loss": 339.9603, "step": 2180 }, { "epoch": 0.018116391611862515, "grad_norm": 1110.0518798828125, "learning_rate": 1.76969696969697e-06, "loss": 311.3122, "step": 2190 }, { "epoch": 0.018199114861231748, "grad_norm": 1060.744140625, "learning_rate": 1.777777777777778e-06, "loss": 350.8291, "step": 2200 }, { "epoch": 0.018281838110600985, "grad_norm": 2863.481201171875, "learning_rate": 1.7858585858585862e-06, "loss": 347.886, "step": 2210 }, { "epoch": 0.01836456135997022, "grad_norm": 1049.102294921875, "learning_rate": 1.793939393939394e-06, "loss": 255.8531, "step": 2220 }, { "epoch": 0.018447284609339455, "grad_norm": 1447.9163818359375, "learning_rate": 1.802020202020202e-06, "loss": 270.9402, "step": 2230 }, { "epoch": 0.01853000785870869, "grad_norm": 5666.75634765625, "learning_rate": 1.8101010101010103e-06, "loss": 265.4302, "step": 2240 }, { "epoch": 0.018612731108077925, "grad_norm": 901.1162109375, "learning_rate": 1.8181818181818183e-06, "loss": 327.2734, "step": 2250 }, { "epoch": 0.01869545435744716, "grad_norm": 1582.9307861328125, "learning_rate": 1.8262626262626265e-06, "loss": 295.4947, "step": 2260 }, { "epoch": 0.018778177606816396, "grad_norm": 910.0087280273438, "learning_rate": 1.8343434343434343e-06, "loss": 254.5832, "step": 2270 }, { "epoch": 0.018860900856185633, "grad_norm": 1553.7003173828125, "learning_rate": 1.8424242424242426e-06, "loss": 312.1798, "step": 2280 }, { "epoch": 0.018943624105554866, "grad_norm": 1657.673828125, "learning_rate": 1.8505050505050506e-06, "loss": 333.7471, "step": 2290 }, { "epoch": 0.019026347354924103, "grad_norm": 2194.053466796875, "learning_rate": 1.8585858585858588e-06, "loss": 252.629, "step": 2300 }, { "epoch": 0.019109070604293336, "grad_norm": 968.1193237304688, "learning_rate": 1.8666666666666669e-06, "loss": 317.2808, "step": 2310 }, { "epoch": 0.019191793853662573, "grad_norm": 2099.283203125, "learning_rate": 1.874747474747475e-06, "loss": 318.3145, "step": 2320 }, { "epoch": 0.019274517103031807, "grad_norm": 1018.1463012695312, "learning_rate": 1.882828282828283e-06, "loss": 310.5461, "step": 2330 }, { "epoch": 0.019357240352401044, "grad_norm": 1319.893310546875, "learning_rate": 1.890909090909091e-06, "loss": 403.0323, "step": 2340 }, { "epoch": 0.019439963601770277, "grad_norm": 3296.14404296875, "learning_rate": 1.8989898989898992e-06, "loss": 277.1991, "step": 2350 }, { "epoch": 0.019522686851139514, "grad_norm": 2120.728515625, "learning_rate": 1.9070707070707072e-06, "loss": 266.7476, "step": 2360 }, { "epoch": 0.019605410100508747, "grad_norm": 1736.4859619140625, "learning_rate": 1.9151515151515154e-06, "loss": 385.987, "step": 2370 }, { "epoch": 0.019688133349877984, "grad_norm": 1218.7796630859375, "learning_rate": 1.9232323232323235e-06, "loss": 337.6978, "step": 2380 }, { "epoch": 0.019770856599247218, "grad_norm": 4797.71337890625, "learning_rate": 1.9313131313131315e-06, "loss": 255.6176, "step": 2390 }, { "epoch": 0.019853579848616455, "grad_norm": 1956.2137451171875, "learning_rate": 1.9393939393939395e-06, "loss": 283.0651, "step": 2400 }, { "epoch": 0.019936303097985688, "grad_norm": 918.0914916992188, "learning_rate": 1.9474747474747475e-06, "loss": 312.682, "step": 2410 }, { "epoch": 0.020019026347354925, "grad_norm": 2107.04736328125, "learning_rate": 1.955555555555556e-06, "loss": 384.748, "step": 2420 }, { "epoch": 0.02010174959672416, "grad_norm": 861.828125, "learning_rate": 1.9636363636363636e-06, "loss": 340.8865, "step": 2430 }, { "epoch": 0.020184472846093395, "grad_norm": 6480.6171875, "learning_rate": 1.9717171717171716e-06, "loss": 416.4972, "step": 2440 }, { "epoch": 0.02026719609546263, "grad_norm": 1926.79296875, "learning_rate": 1.97979797979798e-06, "loss": 260.5365, "step": 2450 }, { "epoch": 0.020349919344831865, "grad_norm": 1225.735107421875, "learning_rate": 1.987878787878788e-06, "loss": 261.068, "step": 2460 }, { "epoch": 0.0204326425942011, "grad_norm": 857.5597534179688, "learning_rate": 1.995959595959596e-06, "loss": 241.7558, "step": 2470 }, { "epoch": 0.020515365843570336, "grad_norm": 42987.5546875, "learning_rate": 2.004040404040404e-06, "loss": 380.0271, "step": 2480 }, { "epoch": 0.02059808909293957, "grad_norm": 1689.1239013671875, "learning_rate": 2.012121212121212e-06, "loss": 390.1416, "step": 2490 }, { "epoch": 0.020680812342308806, "grad_norm": 4134.138671875, "learning_rate": 2.02020202020202e-06, "loss": 265.3958, "step": 2500 }, { "epoch": 0.02076353559167804, "grad_norm": 1039.45703125, "learning_rate": 2.0282828282828286e-06, "loss": 297.8323, "step": 2510 }, { "epoch": 0.020846258841047276, "grad_norm": 2275.5380859375, "learning_rate": 2.0363636363636367e-06, "loss": 326.8338, "step": 2520 }, { "epoch": 0.02092898209041651, "grad_norm": 1341.4542236328125, "learning_rate": 2.0444444444444447e-06, "loss": 255.4626, "step": 2530 }, { "epoch": 0.021011705339785747, "grad_norm": 1704.446044921875, "learning_rate": 2.0525252525252527e-06, "loss": 311.6551, "step": 2540 }, { "epoch": 0.021094428589154984, "grad_norm": 1272.9852294921875, "learning_rate": 2.0606060606060607e-06, "loss": 278.1134, "step": 2550 }, { "epoch": 0.021177151838524217, "grad_norm": 788.3895263671875, "learning_rate": 2.0686868686868688e-06, "loss": 282.1122, "step": 2560 }, { "epoch": 0.021259875087893454, "grad_norm": 1385.612548828125, "learning_rate": 2.0767676767676768e-06, "loss": 314.3244, "step": 2570 }, { "epoch": 0.021342598337262687, "grad_norm": 1955.3095703125, "learning_rate": 2.0848484848484852e-06, "loss": 262.7597, "step": 2580 }, { "epoch": 0.021425321586631924, "grad_norm": 1727.3680419921875, "learning_rate": 2.092929292929293e-06, "loss": 264.8611, "step": 2590 }, { "epoch": 0.021508044836001158, "grad_norm": 1702.6575927734375, "learning_rate": 2.1010101010101013e-06, "loss": 286.7952, "step": 2600 }, { "epoch": 0.021590768085370395, "grad_norm": 1000.8258666992188, "learning_rate": 2.1090909090909093e-06, "loss": 259.9386, "step": 2610 }, { "epoch": 0.021673491334739628, "grad_norm": 1225.5982666015625, "learning_rate": 2.1171717171717173e-06, "loss": 248.9738, "step": 2620 }, { "epoch": 0.021756214584108865, "grad_norm": 2182.8955078125, "learning_rate": 2.1252525252525254e-06, "loss": 290.0493, "step": 2630 }, { "epoch": 0.0218389378334781, "grad_norm": 946.896484375, "learning_rate": 2.133333333333334e-06, "loss": 285.7407, "step": 2640 }, { "epoch": 0.021921661082847335, "grad_norm": 965.7489624023438, "learning_rate": 2.1414141414141414e-06, "loss": 277.0386, "step": 2650 }, { "epoch": 0.02200438433221657, "grad_norm": 1855.605712890625, "learning_rate": 2.1494949494949494e-06, "loss": 308.2425, "step": 2660 }, { "epoch": 0.022087107581585805, "grad_norm": 945.2372436523438, "learning_rate": 2.157575757575758e-06, "loss": 314.9084, "step": 2670 }, { "epoch": 0.02216983083095504, "grad_norm": 2101.0771484375, "learning_rate": 2.165656565656566e-06, "loss": 277.5356, "step": 2680 }, { "epoch": 0.022252554080324276, "grad_norm": 931.0570678710938, "learning_rate": 2.173737373737374e-06, "loss": 380.9075, "step": 2690 }, { "epoch": 0.02233527732969351, "grad_norm": 849.65380859375, "learning_rate": 2.181818181818182e-06, "loss": 266.7404, "step": 2700 }, { "epoch": 0.022418000579062746, "grad_norm": 795.3492431640625, "learning_rate": 2.18989898989899e-06, "loss": 301.0031, "step": 2710 }, { "epoch": 0.02250072382843198, "grad_norm": 998.2992553710938, "learning_rate": 2.197979797979798e-06, "loss": 236.0826, "step": 2720 }, { "epoch": 0.022583447077801216, "grad_norm": 3029.24462890625, "learning_rate": 2.2060606060606064e-06, "loss": 284.6466, "step": 2730 }, { "epoch": 0.02266617032717045, "grad_norm": 991.4857177734375, "learning_rate": 2.2141414141414145e-06, "loss": 480.9545, "step": 2740 }, { "epoch": 0.022748893576539687, "grad_norm": 610.8831787109375, "learning_rate": 2.222222222222222e-06, "loss": 280.1348, "step": 2750 }, { "epoch": 0.02283161682590892, "grad_norm": 1224.61083984375, "learning_rate": 2.2303030303030305e-06, "loss": 287.0002, "step": 2760 }, { "epoch": 0.022914340075278157, "grad_norm": 967.6873779296875, "learning_rate": 2.2383838383838385e-06, "loss": 302.4269, "step": 2770 }, { "epoch": 0.02299706332464739, "grad_norm": 1249.24169921875, "learning_rate": 2.2464646464646466e-06, "loss": 324.5018, "step": 2780 }, { "epoch": 0.023079786574016627, "grad_norm": 2674.385498046875, "learning_rate": 2.254545454545455e-06, "loss": 253.8143, "step": 2790 }, { "epoch": 0.023162509823385864, "grad_norm": 1356.0936279296875, "learning_rate": 2.262626262626263e-06, "loss": 287.3596, "step": 2800 }, { "epoch": 0.023245233072755098, "grad_norm": 1003.8848266601562, "learning_rate": 2.2707070707070706e-06, "loss": 259.5809, "step": 2810 }, { "epoch": 0.023327956322124335, "grad_norm": 1609.186767578125, "learning_rate": 2.278787878787879e-06, "loss": 243.4104, "step": 2820 }, { "epoch": 0.023410679571493568, "grad_norm": 1286.5640869140625, "learning_rate": 2.286868686868687e-06, "loss": 303.2196, "step": 2830 }, { "epoch": 0.023493402820862805, "grad_norm": 1285.9228515625, "learning_rate": 2.294949494949495e-06, "loss": 274.7912, "step": 2840 }, { "epoch": 0.02357612607023204, "grad_norm": 1015.9469604492188, "learning_rate": 2.303030303030303e-06, "loss": 255.5114, "step": 2850 }, { "epoch": 0.023658849319601275, "grad_norm": 1524.3387451171875, "learning_rate": 2.311111111111111e-06, "loss": 394.2463, "step": 2860 }, { "epoch": 0.02374157256897051, "grad_norm": 4101.37548828125, "learning_rate": 2.3191919191919192e-06, "loss": 343.4752, "step": 2870 }, { "epoch": 0.023824295818339745, "grad_norm": 1837.815673828125, "learning_rate": 2.3272727272727277e-06, "loss": 306.1534, "step": 2880 }, { "epoch": 0.02390701906770898, "grad_norm": 1742.3822021484375, "learning_rate": 2.3353535353535357e-06, "loss": 382.2382, "step": 2890 }, { "epoch": 0.023989742317078216, "grad_norm": 922.0031127929688, "learning_rate": 2.3434343434343437e-06, "loss": 230.2729, "step": 2900 }, { "epoch": 0.02407246556644745, "grad_norm": 860.6764526367188, "learning_rate": 2.3515151515151517e-06, "loss": 257.8272, "step": 2910 }, { "epoch": 0.024155188815816686, "grad_norm": 3094.787841796875, "learning_rate": 2.3595959595959598e-06, "loss": 247.0388, "step": 2920 }, { "epoch": 0.02423791206518592, "grad_norm": 1400.10009765625, "learning_rate": 2.367676767676768e-06, "loss": 350.3922, "step": 2930 }, { "epoch": 0.024320635314555156, "grad_norm": 1306.7431640625, "learning_rate": 2.375757575757576e-06, "loss": 318.7807, "step": 2940 }, { "epoch": 0.02440335856392439, "grad_norm": 859.3858032226562, "learning_rate": 2.3838383838383843e-06, "loss": 292.0828, "step": 2950 }, { "epoch": 0.024486081813293627, "grad_norm": 1207.4444580078125, "learning_rate": 2.3919191919191923e-06, "loss": 268.6456, "step": 2960 }, { "epoch": 0.02456880506266286, "grad_norm": 1388.7242431640625, "learning_rate": 2.4000000000000003e-06, "loss": 327.8167, "step": 2970 }, { "epoch": 0.024651528312032097, "grad_norm": 1601.5413818359375, "learning_rate": 2.4080808080808083e-06, "loss": 217.4461, "step": 2980 }, { "epoch": 0.02473425156140133, "grad_norm": 1094.072509765625, "learning_rate": 2.4161616161616164e-06, "loss": 237.2505, "step": 2990 }, { "epoch": 0.024816974810770567, "grad_norm": 1064.4403076171875, "learning_rate": 2.4242424242424244e-06, "loss": 274.2577, "step": 3000 }, { "epoch": 0.0248996980601398, "grad_norm": 2712.60986328125, "learning_rate": 2.432323232323233e-06, "loss": 336.2089, "step": 3010 }, { "epoch": 0.024982421309509038, "grad_norm": 657.0144653320312, "learning_rate": 2.4404040404040404e-06, "loss": 285.5267, "step": 3020 }, { "epoch": 0.02506514455887827, "grad_norm": 1818.3843994140625, "learning_rate": 2.4484848484848485e-06, "loss": 345.3333, "step": 3030 }, { "epoch": 0.025147867808247508, "grad_norm": 1924.058349609375, "learning_rate": 2.456565656565657e-06, "loss": 254.806, "step": 3040 }, { "epoch": 0.025230591057616745, "grad_norm": 926.4883422851562, "learning_rate": 2.464646464646465e-06, "loss": 248.8595, "step": 3050 }, { "epoch": 0.025313314306985978, "grad_norm": 1217.055419921875, "learning_rate": 2.472727272727273e-06, "loss": 258.9547, "step": 3060 }, { "epoch": 0.025396037556355215, "grad_norm": 837.9513549804688, "learning_rate": 2.480808080808081e-06, "loss": 239.7319, "step": 3070 }, { "epoch": 0.02547876080572445, "grad_norm": 702.333740234375, "learning_rate": 2.488888888888889e-06, "loss": 290.1943, "step": 3080 }, { "epoch": 0.025561484055093685, "grad_norm": 1217.6317138671875, "learning_rate": 2.496969696969697e-06, "loss": 282.1116, "step": 3090 }, { "epoch": 0.02564420730446292, "grad_norm": 1642.226806640625, "learning_rate": 2.5050505050505055e-06, "loss": 242.3538, "step": 3100 }, { "epoch": 0.025726930553832156, "grad_norm": 1036.6883544921875, "learning_rate": 2.5131313131313135e-06, "loss": 339.3384, "step": 3110 }, { "epoch": 0.02580965380320139, "grad_norm": 2547.31396484375, "learning_rate": 2.5212121212121215e-06, "loss": 268.7113, "step": 3120 }, { "epoch": 0.025892377052570626, "grad_norm": 2499.7900390625, "learning_rate": 2.5292929292929296e-06, "loss": 371.6728, "step": 3130 }, { "epoch": 0.02597510030193986, "grad_norm": 835.4087524414062, "learning_rate": 2.537373737373738e-06, "loss": 331.6685, "step": 3140 }, { "epoch": 0.026057823551309096, "grad_norm": 1723.9932861328125, "learning_rate": 2.5454545454545456e-06, "loss": 241.2475, "step": 3150 }, { "epoch": 0.02614054680067833, "grad_norm": 892.8982543945312, "learning_rate": 2.5535353535353536e-06, "loss": 239.1675, "step": 3160 }, { "epoch": 0.026223270050047567, "grad_norm": 1679.0596923828125, "learning_rate": 2.5616161616161617e-06, "loss": 251.3017, "step": 3170 }, { "epoch": 0.0263059932994168, "grad_norm": 1314.4605712890625, "learning_rate": 2.5696969696969697e-06, "loss": 291.6142, "step": 3180 }, { "epoch": 0.026388716548786037, "grad_norm": 1959.02490234375, "learning_rate": 2.577777777777778e-06, "loss": 335.8238, "step": 3190 }, { "epoch": 0.02647143979815527, "grad_norm": 1912.5706787109375, "learning_rate": 2.585858585858586e-06, "loss": 374.3031, "step": 3200 }, { "epoch": 0.026554163047524507, "grad_norm": 3296.021728515625, "learning_rate": 2.593939393939394e-06, "loss": 258.0628, "step": 3210 }, { "epoch": 0.02663688629689374, "grad_norm": 1115.4268798828125, "learning_rate": 2.602020202020202e-06, "loss": 334.8846, "step": 3220 }, { "epoch": 0.026719609546262978, "grad_norm": 973.0841064453125, "learning_rate": 2.6101010101010107e-06, "loss": 275.7638, "step": 3230 }, { "epoch": 0.02680233279563221, "grad_norm": 1405.8780517578125, "learning_rate": 2.6181818181818187e-06, "loss": 290.556, "step": 3240 }, { "epoch": 0.026885056045001448, "grad_norm": 2512.458740234375, "learning_rate": 2.6262626262626267e-06, "loss": 255.2991, "step": 3250 }, { "epoch": 0.02696777929437068, "grad_norm": 1015.4577026367188, "learning_rate": 2.6343434343434343e-06, "loss": 293.1441, "step": 3260 }, { "epoch": 0.027050502543739918, "grad_norm": 1590.4383544921875, "learning_rate": 2.6424242424242423e-06, "loss": 315.3852, "step": 3270 }, { "epoch": 0.02713322579310915, "grad_norm": 1241.1497802734375, "learning_rate": 2.6505050505050508e-06, "loss": 279.314, "step": 3280 }, { "epoch": 0.02721594904247839, "grad_norm": 803.2537841796875, "learning_rate": 2.658585858585859e-06, "loss": 237.7475, "step": 3290 }, { "epoch": 0.027298672291847625, "grad_norm": 1308.884765625, "learning_rate": 2.666666666666667e-06, "loss": 289.3862, "step": 3300 }, { "epoch": 0.02738139554121686, "grad_norm": 1037.373291015625, "learning_rate": 2.674747474747475e-06, "loss": 278.7418, "step": 3310 }, { "epoch": 0.027464118790586096, "grad_norm": 1414.7493896484375, "learning_rate": 2.6828282828282833e-06, "loss": 388.9876, "step": 3320 }, { "epoch": 0.02754684203995533, "grad_norm": 1686.8880615234375, "learning_rate": 2.6909090909090913e-06, "loss": 224.3872, "step": 3330 }, { "epoch": 0.027629565289324566, "grad_norm": 1130.1907958984375, "learning_rate": 2.6989898989898994e-06, "loss": 219.274, "step": 3340 }, { "epoch": 0.0277122885386938, "grad_norm": 1604.05419921875, "learning_rate": 2.7070707070707074e-06, "loss": 313.1014, "step": 3350 }, { "epoch": 0.027795011788063036, "grad_norm": 1515.8199462890625, "learning_rate": 2.715151515151516e-06, "loss": 351.9699, "step": 3360 }, { "epoch": 0.02787773503743227, "grad_norm": 1105.636474609375, "learning_rate": 2.7232323232323234e-06, "loss": 258.1985, "step": 3370 }, { "epoch": 0.027960458286801507, "grad_norm": 2343.5537109375, "learning_rate": 2.7313131313131315e-06, "loss": 247.2701, "step": 3380 }, { "epoch": 0.02804318153617074, "grad_norm": 3364.70068359375, "learning_rate": 2.7393939393939395e-06, "loss": 320.3825, "step": 3390 }, { "epoch": 0.028125904785539977, "grad_norm": 1226.61279296875, "learning_rate": 2.7474747474747475e-06, "loss": 308.3539, "step": 3400 }, { "epoch": 0.02820862803490921, "grad_norm": 1365.4581298828125, "learning_rate": 2.755555555555556e-06, "loss": 265.8813, "step": 3410 }, { "epoch": 0.028291351284278447, "grad_norm": 1471.4437255859375, "learning_rate": 2.763636363636364e-06, "loss": 246.5465, "step": 3420 }, { "epoch": 0.02837407453364768, "grad_norm": 1522.9873046875, "learning_rate": 2.771717171717172e-06, "loss": 294.6199, "step": 3430 }, { "epoch": 0.028456797783016918, "grad_norm": 2295.73095703125, "learning_rate": 2.77979797979798e-06, "loss": 471.7754, "step": 3440 }, { "epoch": 0.02853952103238615, "grad_norm": 1030.5484619140625, "learning_rate": 2.7878787878787885e-06, "loss": 315.6531, "step": 3450 }, { "epoch": 0.028622244281755388, "grad_norm": 1534.0970458984375, "learning_rate": 2.7959595959595965e-06, "loss": 236.7922, "step": 3460 }, { "epoch": 0.02870496753112462, "grad_norm": 1193.41748046875, "learning_rate": 2.804040404040404e-06, "loss": 301.1876, "step": 3470 }, { "epoch": 0.028787690780493858, "grad_norm": 884.4568481445312, "learning_rate": 2.812121212121212e-06, "loss": 248.0099, "step": 3480 }, { "epoch": 0.02887041402986309, "grad_norm": 1417.0870361328125, "learning_rate": 2.82020202020202e-06, "loss": 214.0175, "step": 3490 }, { "epoch": 0.02895313727923233, "grad_norm": 906.0728149414062, "learning_rate": 2.8282828282828286e-06, "loss": 337.6909, "step": 3500 }, { "epoch": 0.029035860528601562, "grad_norm": 720.5711059570312, "learning_rate": 2.8363636363636366e-06, "loss": 237.9402, "step": 3510 }, { "epoch": 0.0291185837779708, "grad_norm": 2159.7626953125, "learning_rate": 2.8444444444444446e-06, "loss": 290.2204, "step": 3520 }, { "epoch": 0.029201307027340032, "grad_norm": 1583.6937255859375, "learning_rate": 2.8525252525252527e-06, "loss": 339.3829, "step": 3530 }, { "epoch": 0.02928403027670927, "grad_norm": 1711.7625732421875, "learning_rate": 2.860606060606061e-06, "loss": 338.8203, "step": 3540 }, { "epoch": 0.029366753526078506, "grad_norm": 1867.3909912109375, "learning_rate": 2.868686868686869e-06, "loss": 295.4354, "step": 3550 }, { "epoch": 0.02944947677544774, "grad_norm": 1350.51708984375, "learning_rate": 2.876767676767677e-06, "loss": 255.1478, "step": 3560 }, { "epoch": 0.029532200024816976, "grad_norm": 1835.9559326171875, "learning_rate": 2.884848484848485e-06, "loss": 235.7582, "step": 3570 }, { "epoch": 0.02961492327418621, "grad_norm": 1273.539794921875, "learning_rate": 2.892929292929293e-06, "loss": 305.7688, "step": 3580 }, { "epoch": 0.029697646523555447, "grad_norm": 1092.1051025390625, "learning_rate": 2.9010101010101012e-06, "loss": 229.7812, "step": 3590 }, { "epoch": 0.02978036977292468, "grad_norm": 1232.7423095703125, "learning_rate": 2.9090909090909093e-06, "loss": 308.136, "step": 3600 }, { "epoch": 0.029863093022293917, "grad_norm": 860.4628295898438, "learning_rate": 2.9171717171717173e-06, "loss": 245.5094, "step": 3610 }, { "epoch": 0.02994581627166315, "grad_norm": 1105.772705078125, "learning_rate": 2.9252525252525253e-06, "loss": 225.044, "step": 3620 }, { "epoch": 0.030028539521032387, "grad_norm": 1092.992431640625, "learning_rate": 2.9333333333333338e-06, "loss": 240.9013, "step": 3630 }, { "epoch": 0.03011126277040162, "grad_norm": 1877.3671875, "learning_rate": 2.941414141414142e-06, "loss": 304.4779, "step": 3640 }, { "epoch": 0.030193986019770858, "grad_norm": 1553.412109375, "learning_rate": 2.94949494949495e-06, "loss": 253.1373, "step": 3650 }, { "epoch": 0.03027670926914009, "grad_norm": 1163.033447265625, "learning_rate": 2.957575757575758e-06, "loss": 272.4007, "step": 3660 }, { "epoch": 0.030359432518509328, "grad_norm": 1105.1661376953125, "learning_rate": 2.9656565656565663e-06, "loss": 243.5992, "step": 3670 }, { "epoch": 0.03044215576787856, "grad_norm": 1370.2781982421875, "learning_rate": 2.9737373737373743e-06, "loss": 232.3612, "step": 3680 }, { "epoch": 0.030524879017247798, "grad_norm": 925.8299560546875, "learning_rate": 2.981818181818182e-06, "loss": 194.9076, "step": 3690 }, { "epoch": 0.03060760226661703, "grad_norm": 4565.89208984375, "learning_rate": 2.98989898989899e-06, "loss": 317.9731, "step": 3700 }, { "epoch": 0.03069032551598627, "grad_norm": 1785.587890625, "learning_rate": 2.997979797979798e-06, "loss": 261.7598, "step": 3710 }, { "epoch": 0.030773048765355502, "grad_norm": 1177.1761474609375, "learning_rate": 3.0060606060606064e-06, "loss": 211.625, "step": 3720 }, { "epoch": 0.03085577201472474, "grad_norm": 980.7220458984375, "learning_rate": 3.0141414141414144e-06, "loss": 198.16, "step": 3730 }, { "epoch": 0.030938495264093972, "grad_norm": 1450.34033203125, "learning_rate": 3.0222222222222225e-06, "loss": 281.4796, "step": 3740 }, { "epoch": 0.03102121851346321, "grad_norm": 917.07275390625, "learning_rate": 3.0303030303030305e-06, "loss": 266.2501, "step": 3750 }, { "epoch": 0.031103941762832443, "grad_norm": 1657.814697265625, "learning_rate": 3.038383838383839e-06, "loss": 279.4682, "step": 3760 }, { "epoch": 0.03118666501220168, "grad_norm": 1294.77685546875, "learning_rate": 3.046464646464647e-06, "loss": 259.5207, "step": 3770 }, { "epoch": 0.03126938826157091, "grad_norm": 1622.716796875, "learning_rate": 3.054545454545455e-06, "loss": 298.1313, "step": 3780 }, { "epoch": 0.031352111510940146, "grad_norm": 1660.8116455078125, "learning_rate": 3.0626262626262626e-06, "loss": 250.9933, "step": 3790 }, { "epoch": 0.03143483476030939, "grad_norm": 799.0031127929688, "learning_rate": 3.0707070707070706e-06, "loss": 310.6956, "step": 3800 }, { "epoch": 0.03151755800967862, "grad_norm": 1199.9678955078125, "learning_rate": 3.078787878787879e-06, "loss": 283.1158, "step": 3810 }, { "epoch": 0.031600281259047854, "grad_norm": 1449.6212158203125, "learning_rate": 3.086868686868687e-06, "loss": 249.93, "step": 3820 }, { "epoch": 0.031683004508417094, "grad_norm": 1299.2960205078125, "learning_rate": 3.094949494949495e-06, "loss": 224.7813, "step": 3830 }, { "epoch": 0.03176572775778633, "grad_norm": 2239.721435546875, "learning_rate": 3.103030303030303e-06, "loss": 223.2453, "step": 3840 }, { "epoch": 0.03184845100715556, "grad_norm": 1760.5396728515625, "learning_rate": 3.1111111111111116e-06, "loss": 252.7971, "step": 3850 }, { "epoch": 0.031931174256524794, "grad_norm": 682.6892700195312, "learning_rate": 3.1191919191919196e-06, "loss": 216.5466, "step": 3860 }, { "epoch": 0.032013897505894034, "grad_norm": 2052.32861328125, "learning_rate": 3.1272727272727276e-06, "loss": 288.775, "step": 3870 }, { "epoch": 0.03209662075526327, "grad_norm": 1602.2225341796875, "learning_rate": 3.1353535353535357e-06, "loss": 328.757, "step": 3880 }, { "epoch": 0.0321793440046325, "grad_norm": 681.2459106445312, "learning_rate": 3.143434343434344e-06, "loss": 190.3252, "step": 3890 }, { "epoch": 0.032262067254001735, "grad_norm": 1182.54052734375, "learning_rate": 3.1515151515151517e-06, "loss": 268.2503, "step": 3900 }, { "epoch": 0.032344790503370975, "grad_norm": 1219.3660888671875, "learning_rate": 3.1595959595959597e-06, "loss": 239.1843, "step": 3910 }, { "epoch": 0.03242751375274021, "grad_norm": 1041.707275390625, "learning_rate": 3.1676767676767678e-06, "loss": 273.016, "step": 3920 }, { "epoch": 0.03251023700210944, "grad_norm": 1269.7681884765625, "learning_rate": 3.1757575757575758e-06, "loss": 249.6547, "step": 3930 }, { "epoch": 0.032592960251478675, "grad_norm": 1372.903564453125, "learning_rate": 3.1838383838383842e-06, "loss": 311.8126, "step": 3940 }, { "epoch": 0.032675683500847916, "grad_norm": 2617.830322265625, "learning_rate": 3.1919191919191923e-06, "loss": 299.153, "step": 3950 }, { "epoch": 0.03275840675021715, "grad_norm": 1350.3302001953125, "learning_rate": 3.2000000000000003e-06, "loss": 238.4413, "step": 3960 }, { "epoch": 0.03284112999958638, "grad_norm": 2270.457763671875, "learning_rate": 3.2080808080808083e-06, "loss": 267.5316, "step": 3970 }, { "epoch": 0.032923853248955616, "grad_norm": 1409.05712890625, "learning_rate": 3.2161616161616168e-06, "loss": 237.9799, "step": 3980 }, { "epoch": 0.033006576498324856, "grad_norm": 1641.050537109375, "learning_rate": 3.2242424242424248e-06, "loss": 352.6279, "step": 3990 }, { "epoch": 0.03308929974769409, "grad_norm": 1178.6591796875, "learning_rate": 3.232323232323233e-06, "loss": 200.0928, "step": 4000 }, { "epoch": 0.03317202299706332, "grad_norm": 895.1983032226562, "learning_rate": 3.2404040404040404e-06, "loss": 274.1366, "step": 4010 }, { "epoch": 0.03325474624643256, "grad_norm": 2193.698486328125, "learning_rate": 3.2484848484848484e-06, "loss": 205.6771, "step": 4020 }, { "epoch": 0.0333374694958018, "grad_norm": 1705.0743408203125, "learning_rate": 3.256565656565657e-06, "loss": 253.3518, "step": 4030 }, { "epoch": 0.03342019274517103, "grad_norm": 1552.232666015625, "learning_rate": 3.264646464646465e-06, "loss": 243.4265, "step": 4040 }, { "epoch": 0.033502915994540264, "grad_norm": 4037.46875, "learning_rate": 3.272727272727273e-06, "loss": 229.4004, "step": 4050 }, { "epoch": 0.033585639243909504, "grad_norm": 3386.515869140625, "learning_rate": 3.280808080808081e-06, "loss": 338.8573, "step": 4060 }, { "epoch": 0.03366836249327874, "grad_norm": 1145.6063232421875, "learning_rate": 3.2888888888888894e-06, "loss": 254.9263, "step": 4070 }, { "epoch": 0.03375108574264797, "grad_norm": 2452.519775390625, "learning_rate": 3.2969696969696974e-06, "loss": 365.3762, "step": 4080 }, { "epoch": 0.033833808992017204, "grad_norm": 1752.8304443359375, "learning_rate": 3.3050505050505054e-06, "loss": 242.0698, "step": 4090 }, { "epoch": 0.033916532241386445, "grad_norm": 1975.453857421875, "learning_rate": 3.3131313131313135e-06, "loss": 294.2446, "step": 4100 }, { "epoch": 0.03399925549075568, "grad_norm": 1159.05615234375, "learning_rate": 3.321212121212121e-06, "loss": 254.9454, "step": 4110 }, { "epoch": 0.03408197874012491, "grad_norm": 904.747802734375, "learning_rate": 3.3292929292929295e-06, "loss": 291.38, "step": 4120 }, { "epoch": 0.034164701989494145, "grad_norm": 1563.5689697265625, "learning_rate": 3.3373737373737375e-06, "loss": 212.5943, "step": 4130 }, { "epoch": 0.034247425238863385, "grad_norm": 1204.7821044921875, "learning_rate": 3.3454545454545456e-06, "loss": 265.8186, "step": 4140 }, { "epoch": 0.03433014848823262, "grad_norm": 904.9209594726562, "learning_rate": 3.3535353535353536e-06, "loss": 260.1104, "step": 4150 }, { "epoch": 0.03441287173760185, "grad_norm": 1248.727783203125, "learning_rate": 3.361616161616162e-06, "loss": 291.2336, "step": 4160 }, { "epoch": 0.034495594986971086, "grad_norm": 893.3284912109375, "learning_rate": 3.36969696969697e-06, "loss": 233.5644, "step": 4170 }, { "epoch": 0.034578318236340326, "grad_norm": 1158.58984375, "learning_rate": 3.377777777777778e-06, "loss": 246.782, "step": 4180 }, { "epoch": 0.03466104148570956, "grad_norm": 1969.4088134765625, "learning_rate": 3.385858585858586e-06, "loss": 237.9205, "step": 4190 }, { "epoch": 0.03474376473507879, "grad_norm": 1035.88427734375, "learning_rate": 3.3939393939393946e-06, "loss": 321.7581, "step": 4200 }, { "epoch": 0.034826487984448026, "grad_norm": 918.1465454101562, "learning_rate": 3.4020202020202026e-06, "loss": 272.7782, "step": 4210 }, { "epoch": 0.03490921123381727, "grad_norm": 1070.0615234375, "learning_rate": 3.41010101010101e-06, "loss": 224.1239, "step": 4220 }, { "epoch": 0.0349919344831865, "grad_norm": 1230.9432373046875, "learning_rate": 3.4181818181818182e-06, "loss": 276.4893, "step": 4230 }, { "epoch": 0.035074657732555733, "grad_norm": 1131.748046875, "learning_rate": 3.4262626262626262e-06, "loss": 217.2287, "step": 4240 }, { "epoch": 0.03515738098192497, "grad_norm": 2427.749267578125, "learning_rate": 3.4343434343434347e-06, "loss": 251.6459, "step": 4250 }, { "epoch": 0.03524010423129421, "grad_norm": 1668.80810546875, "learning_rate": 3.4424242424242427e-06, "loss": 317.6187, "step": 4260 }, { "epoch": 0.03532282748066344, "grad_norm": 372.29510498046875, "learning_rate": 3.4505050505050507e-06, "loss": 179.444, "step": 4270 }, { "epoch": 0.035405550730032674, "grad_norm": 1011.8233032226562, "learning_rate": 3.4585858585858588e-06, "loss": 219.8903, "step": 4280 }, { "epoch": 0.03548827397940191, "grad_norm": 1418.88232421875, "learning_rate": 3.4666666666666672e-06, "loss": 286.1539, "step": 4290 }, { "epoch": 0.03557099722877115, "grad_norm": 2228.23095703125, "learning_rate": 3.4747474747474752e-06, "loss": 293.0923, "step": 4300 }, { "epoch": 0.03565372047814038, "grad_norm": 715.6218872070312, "learning_rate": 3.4828282828282833e-06, "loss": 282.1017, "step": 4310 }, { "epoch": 0.035736443727509615, "grad_norm": 742.7416381835938, "learning_rate": 3.4909090909090913e-06, "loss": 204.9258, "step": 4320 }, { "epoch": 0.035819166976878855, "grad_norm": 1228.9534912109375, "learning_rate": 3.498989898989899e-06, "loss": 259.2133, "step": 4330 }, { "epoch": 0.03590189022624809, "grad_norm": 967.5274047851562, "learning_rate": 3.5070707070707073e-06, "loss": 288.3385, "step": 4340 }, { "epoch": 0.03598461347561732, "grad_norm": 1587.67724609375, "learning_rate": 3.5151515151515154e-06, "loss": 277.6203, "step": 4350 }, { "epoch": 0.036067336724986555, "grad_norm": 1431.6448974609375, "learning_rate": 3.5232323232323234e-06, "loss": 213.064, "step": 4360 }, { "epoch": 0.036150059974355796, "grad_norm": 1135.779052734375, "learning_rate": 3.5313131313131314e-06, "loss": 221.4357, "step": 4370 }, { "epoch": 0.03623278322372503, "grad_norm": 1519.3890380859375, "learning_rate": 3.53939393939394e-06, "loss": 277.6575, "step": 4380 }, { "epoch": 0.03631550647309426, "grad_norm": 1360.25048828125, "learning_rate": 3.547474747474748e-06, "loss": 195.2055, "step": 4390 }, { "epoch": 0.036398229722463496, "grad_norm": 2059.44287109375, "learning_rate": 3.555555555555556e-06, "loss": 324.8405, "step": 4400 }, { "epoch": 0.036480952971832736, "grad_norm": 780.7919921875, "learning_rate": 3.563636363636364e-06, "loss": 212.4319, "step": 4410 }, { "epoch": 0.03656367622120197, "grad_norm": 1413.5367431640625, "learning_rate": 3.5717171717171724e-06, "loss": 255.6041, "step": 4420 }, { "epoch": 0.0366463994705712, "grad_norm": 637.59521484375, "learning_rate": 3.57979797979798e-06, "loss": 193.1592, "step": 4430 }, { "epoch": 0.03672912271994044, "grad_norm": 1240.098876953125, "learning_rate": 3.587878787878788e-06, "loss": 331.8386, "step": 4440 }, { "epoch": 0.03681184596930968, "grad_norm": 980.0120239257812, "learning_rate": 3.595959595959596e-06, "loss": 276.4505, "step": 4450 }, { "epoch": 0.03689456921867891, "grad_norm": 2805.927001953125, "learning_rate": 3.604040404040404e-06, "loss": 269.0381, "step": 4460 }, { "epoch": 0.036977292468048144, "grad_norm": 2359.113525390625, "learning_rate": 3.6121212121212125e-06, "loss": 258.4454, "step": 4470 }, { "epoch": 0.03706001571741738, "grad_norm": 1293.63720703125, "learning_rate": 3.6202020202020205e-06, "loss": 225.2379, "step": 4480 }, { "epoch": 0.03714273896678662, "grad_norm": 2243.848876953125, "learning_rate": 3.6282828282828286e-06, "loss": 213.6965, "step": 4490 }, { "epoch": 0.03722546221615585, "grad_norm": 770.7854614257812, "learning_rate": 3.6363636363636366e-06, "loss": 231.6815, "step": 4500 }, { "epoch": 0.037308185465525084, "grad_norm": 1308.199462890625, "learning_rate": 3.644444444444445e-06, "loss": 232.9941, "step": 4510 }, { "epoch": 0.03739090871489432, "grad_norm": 883.5875244140625, "learning_rate": 3.652525252525253e-06, "loss": 216.6295, "step": 4520 }, { "epoch": 0.03747363196426356, "grad_norm": 672.0443115234375, "learning_rate": 3.660606060606061e-06, "loss": 201.8103, "step": 4530 }, { "epoch": 0.03755635521363279, "grad_norm": 1314.2528076171875, "learning_rate": 3.6686868686868687e-06, "loss": 256.3278, "step": 4540 }, { "epoch": 0.037639078463002025, "grad_norm": 1359.0018310546875, "learning_rate": 3.6767676767676767e-06, "loss": 216.419, "step": 4550 }, { "epoch": 0.037721801712371265, "grad_norm": 1884.207763671875, "learning_rate": 3.684848484848485e-06, "loss": 211.731, "step": 4560 }, { "epoch": 0.0378045249617405, "grad_norm": 1213.398193359375, "learning_rate": 3.692929292929293e-06, "loss": 272.2958, "step": 4570 }, { "epoch": 0.03788724821110973, "grad_norm": 2207.1171875, "learning_rate": 3.701010101010101e-06, "loss": 265.4331, "step": 4580 }, { "epoch": 0.037969971460478966, "grad_norm": 1944.73876953125, "learning_rate": 3.7090909090909092e-06, "loss": 272.1659, "step": 4590 }, { "epoch": 0.038052694709848206, "grad_norm": 1226.4189453125, "learning_rate": 3.7171717171717177e-06, "loss": 226.3827, "step": 4600 }, { "epoch": 0.03813541795921744, "grad_norm": 1100.8829345703125, "learning_rate": 3.7252525252525257e-06, "loss": 221.5738, "step": 4610 }, { "epoch": 0.03821814120858667, "grad_norm": 1859.399169921875, "learning_rate": 3.7333333333333337e-06, "loss": 229.0128, "step": 4620 }, { "epoch": 0.038300864457955906, "grad_norm": 1107.61572265625, "learning_rate": 3.7414141414141418e-06, "loss": 224.0653, "step": 4630 }, { "epoch": 0.03838358770732515, "grad_norm": 1617.0789794921875, "learning_rate": 3.74949494949495e-06, "loss": 240.6666, "step": 4640 }, { "epoch": 0.03846631095669438, "grad_norm": 2554.2158203125, "learning_rate": 3.757575757575758e-06, "loss": 258.9923, "step": 4650 }, { "epoch": 0.038549034206063613, "grad_norm": 3580.094482421875, "learning_rate": 3.765656565656566e-06, "loss": 270.2465, "step": 4660 }, { "epoch": 0.03863175745543285, "grad_norm": 1391.0946044921875, "learning_rate": 3.773737373737374e-06, "loss": 184.8681, "step": 4670 }, { "epoch": 0.03871448070480209, "grad_norm": 1413.2205810546875, "learning_rate": 3.781818181818182e-06, "loss": 210.8876, "step": 4680 }, { "epoch": 0.03879720395417132, "grad_norm": 1123.7315673828125, "learning_rate": 3.7898989898989903e-06, "loss": 226.6469, "step": 4690 }, { "epoch": 0.038879927203540554, "grad_norm": 1507.7408447265625, "learning_rate": 3.7979797979797984e-06, "loss": 243.835, "step": 4700 }, { "epoch": 0.03896265045290979, "grad_norm": 1154.9927978515625, "learning_rate": 3.8060606060606064e-06, "loss": 275.5142, "step": 4710 }, { "epoch": 0.03904537370227903, "grad_norm": 1263.9693603515625, "learning_rate": 3.8141414141414144e-06, "loss": 208.4059, "step": 4720 }, { "epoch": 0.03912809695164826, "grad_norm": 2182.939208984375, "learning_rate": 3.8222222222222224e-06, "loss": 225.735, "step": 4730 }, { "epoch": 0.039210820201017495, "grad_norm": 1238.331298828125, "learning_rate": 3.830303030303031e-06, "loss": 215.773, "step": 4740 }, { "epoch": 0.03929354345038673, "grad_norm": 1922.897216796875, "learning_rate": 3.8383838383838385e-06, "loss": 205.833, "step": 4750 }, { "epoch": 0.03937626669975597, "grad_norm": 1248.5205078125, "learning_rate": 3.846464646464647e-06, "loss": 212.8604, "step": 4760 }, { "epoch": 0.0394589899491252, "grad_norm": 1999.810791015625, "learning_rate": 3.8545454545454545e-06, "loss": 234.7124, "step": 4770 }, { "epoch": 0.039541713198494435, "grad_norm": 1900.56787109375, "learning_rate": 3.862626262626263e-06, "loss": 206.3176, "step": 4780 }, { "epoch": 0.03962443644786367, "grad_norm": 1776.326171875, "learning_rate": 3.8707070707070706e-06, "loss": 235.4771, "step": 4790 }, { "epoch": 0.03970715969723291, "grad_norm": 1206.546630859375, "learning_rate": 3.878787878787879e-06, "loss": 232.6159, "step": 4800 }, { "epoch": 0.03978988294660214, "grad_norm": 1193.0535888671875, "learning_rate": 3.8868686868686875e-06, "loss": 239.4891, "step": 4810 }, { "epoch": 0.039872606195971376, "grad_norm": 1266.81103515625, "learning_rate": 3.894949494949495e-06, "loss": 264.1928, "step": 4820 }, { "epoch": 0.039955329445340616, "grad_norm": 1152.444580078125, "learning_rate": 3.9030303030303035e-06, "loss": 197.2479, "step": 4830 }, { "epoch": 0.04003805269470985, "grad_norm": 1796.5777587890625, "learning_rate": 3.911111111111112e-06, "loss": 232.1052, "step": 4840 }, { "epoch": 0.04012077594407908, "grad_norm": 1190.642822265625, "learning_rate": 3.9191919191919196e-06, "loss": 246.6287, "step": 4850 }, { "epoch": 0.04020349919344832, "grad_norm": 1134.314453125, "learning_rate": 3.927272727272727e-06, "loss": 268.4593, "step": 4860 }, { "epoch": 0.04028622244281756, "grad_norm": 1696.9732666015625, "learning_rate": 3.935353535353536e-06, "loss": 226.5233, "step": 4870 }, { "epoch": 0.04036894569218679, "grad_norm": 1237.491455078125, "learning_rate": 3.943434343434343e-06, "loss": 196.7495, "step": 4880 }, { "epoch": 0.040451668941556024, "grad_norm": 2431.97265625, "learning_rate": 3.951515151515152e-06, "loss": 220.9505, "step": 4890 }, { "epoch": 0.04053439219092526, "grad_norm": 1421.2952880859375, "learning_rate": 3.95959595959596e-06, "loss": 253.9248, "step": 4900 }, { "epoch": 0.0406171154402945, "grad_norm": 1305.2431640625, "learning_rate": 3.967676767676768e-06, "loss": 302.1668, "step": 4910 }, { "epoch": 0.04069983868966373, "grad_norm": 1975.3135986328125, "learning_rate": 3.975757575757576e-06, "loss": 282.536, "step": 4920 }, { "epoch": 0.040782561939032964, "grad_norm": 1655.1588134765625, "learning_rate": 3.983838383838385e-06, "loss": 217.2957, "step": 4930 }, { "epoch": 0.0408652851884022, "grad_norm": 1051.86083984375, "learning_rate": 3.991919191919192e-06, "loss": 240.2383, "step": 4940 }, { "epoch": 0.04094800843777144, "grad_norm": 773.5546875, "learning_rate": 4.000000000000001e-06, "loss": 221.8524, "step": 4950 }, { "epoch": 0.04103073168714067, "grad_norm": 1287.1944580078125, "learning_rate": 4.008080808080808e-06, "loss": 229.3614, "step": 4960 }, { "epoch": 0.041113454936509905, "grad_norm": 1897.0220947265625, "learning_rate": 4.016161616161616e-06, "loss": 233.9596, "step": 4970 }, { "epoch": 0.04119617818587914, "grad_norm": 1463.894287109375, "learning_rate": 4.024242424242424e-06, "loss": 205.995, "step": 4980 }, { "epoch": 0.04127890143524838, "grad_norm": 981.1106567382812, "learning_rate": 4.032323232323233e-06, "loss": 226.2633, "step": 4990 }, { "epoch": 0.04136162468461761, "grad_norm": 3404.24365234375, "learning_rate": 4.04040404040404e-06, "loss": 249.0778, "step": 5000 }, { "epoch": 0.041444347933986846, "grad_norm": 1160.4622802734375, "learning_rate": 4.048484848484849e-06, "loss": 201.7348, "step": 5010 }, { "epoch": 0.04152707118335608, "grad_norm": 1339.532470703125, "learning_rate": 4.056565656565657e-06, "loss": 296.6651, "step": 5020 }, { "epoch": 0.04160979443272532, "grad_norm": 1330.253173828125, "learning_rate": 4.064646464646465e-06, "loss": 222.7661, "step": 5030 }, { "epoch": 0.04169251768209455, "grad_norm": 1871.73095703125, "learning_rate": 4.072727272727273e-06, "loss": 185.6875, "step": 5040 }, { "epoch": 0.041775240931463786, "grad_norm": 989.5170288085938, "learning_rate": 4.080808080808081e-06, "loss": 184.853, "step": 5050 }, { "epoch": 0.04185796418083302, "grad_norm": 1325.712158203125, "learning_rate": 4.088888888888889e-06, "loss": 177.4656, "step": 5060 }, { "epoch": 0.04194068743020226, "grad_norm": 1084.450927734375, "learning_rate": 4.096969696969697e-06, "loss": 218.0059, "step": 5070 }, { "epoch": 0.04202341067957149, "grad_norm": 1230.160888671875, "learning_rate": 4.105050505050505e-06, "loss": 202.8811, "step": 5080 }, { "epoch": 0.04210613392894073, "grad_norm": 909.71728515625, "learning_rate": 4.113131313131313e-06, "loss": 229.2698, "step": 5090 }, { "epoch": 0.04218885717830997, "grad_norm": 2449.3154296875, "learning_rate": 4.1212121212121215e-06, "loss": 211.7621, "step": 5100 }, { "epoch": 0.0422715804276792, "grad_norm": 578.8762817382812, "learning_rate": 4.12929292929293e-06, "loss": 220.8765, "step": 5110 }, { "epoch": 0.042354303677048434, "grad_norm": 1294.6402587890625, "learning_rate": 4.1373737373737375e-06, "loss": 252.3842, "step": 5120 }, { "epoch": 0.04243702692641767, "grad_norm": 911.5681762695312, "learning_rate": 4.145454545454546e-06, "loss": 294.7006, "step": 5130 }, { "epoch": 0.04251975017578691, "grad_norm": 1328.2413330078125, "learning_rate": 4.1535353535353536e-06, "loss": 211.482, "step": 5140 }, { "epoch": 0.04260247342515614, "grad_norm": 1352.004638671875, "learning_rate": 4.161616161616162e-06, "loss": 210.5752, "step": 5150 }, { "epoch": 0.042685196674525375, "grad_norm": 1081.876953125, "learning_rate": 4.1696969696969705e-06, "loss": 224.4739, "step": 5160 }, { "epoch": 0.04276791992389461, "grad_norm": 2295.4033203125, "learning_rate": 4.177777777777778e-06, "loss": 229.6548, "step": 5170 }, { "epoch": 0.04285064317326385, "grad_norm": 1036.4749755859375, "learning_rate": 4.185858585858586e-06, "loss": 208.1364, "step": 5180 }, { "epoch": 0.04293336642263308, "grad_norm": 1558.9718017578125, "learning_rate": 4.193939393939394e-06, "loss": 262.7995, "step": 5190 }, { "epoch": 0.043016089672002315, "grad_norm": 1155.65478515625, "learning_rate": 4.2020202020202026e-06, "loss": 251.0647, "step": 5200 }, { "epoch": 0.04309881292137155, "grad_norm": 1081.0423583984375, "learning_rate": 4.21010101010101e-06, "loss": 235.0402, "step": 5210 }, { "epoch": 0.04318153617074079, "grad_norm": 725.3358764648438, "learning_rate": 4.218181818181819e-06, "loss": 310.5599, "step": 5220 }, { "epoch": 0.04326425942011002, "grad_norm": 1182.2801513671875, "learning_rate": 4.226262626262626e-06, "loss": 178.3826, "step": 5230 }, { "epoch": 0.043346982669479256, "grad_norm": 1413.02587890625, "learning_rate": 4.234343434343435e-06, "loss": 257.1767, "step": 5240 }, { "epoch": 0.04342970591884849, "grad_norm": 1222.1142578125, "learning_rate": 4.242424242424243e-06, "loss": 218.1251, "step": 5250 }, { "epoch": 0.04351242916821773, "grad_norm": 998.1055297851562, "learning_rate": 4.250505050505051e-06, "loss": 211.1936, "step": 5260 }, { "epoch": 0.04359515241758696, "grad_norm": 844.65478515625, "learning_rate": 4.258585858585859e-06, "loss": 235.3665, "step": 5270 }, { "epoch": 0.0436778756669562, "grad_norm": 691.6785278320312, "learning_rate": 4.266666666666668e-06, "loss": 173.3902, "step": 5280 }, { "epoch": 0.04376059891632543, "grad_norm": 1113.0621337890625, "learning_rate": 4.274747474747475e-06, "loss": 169.7838, "step": 5290 }, { "epoch": 0.04384332216569467, "grad_norm": 1622.1474609375, "learning_rate": 4.282828282828283e-06, "loss": 217.8731, "step": 5300 }, { "epoch": 0.043926045415063904, "grad_norm": 1753.8427734375, "learning_rate": 4.290909090909091e-06, "loss": 251.789, "step": 5310 }, { "epoch": 0.04400876866443314, "grad_norm": 918.1004638671875, "learning_rate": 4.298989898989899e-06, "loss": 201.8801, "step": 5320 }, { "epoch": 0.04409149191380238, "grad_norm": 899.498779296875, "learning_rate": 4.307070707070707e-06, "loss": 230.2721, "step": 5330 }, { "epoch": 0.04417421516317161, "grad_norm": 1194.7314453125, "learning_rate": 4.315151515151516e-06, "loss": 302.4559, "step": 5340 }, { "epoch": 0.044256938412540844, "grad_norm": 745.4715576171875, "learning_rate": 4.323232323232323e-06, "loss": 217.5794, "step": 5350 }, { "epoch": 0.04433966166191008, "grad_norm": 3024.5693359375, "learning_rate": 4.331313131313132e-06, "loss": 215.8062, "step": 5360 }, { "epoch": 0.04442238491127932, "grad_norm": 692.5128173828125, "learning_rate": 4.33939393939394e-06, "loss": 206.9033, "step": 5370 }, { "epoch": 0.04450510816064855, "grad_norm": 638.6357421875, "learning_rate": 4.347474747474748e-06, "loss": 277.6233, "step": 5380 }, { "epoch": 0.044587831410017785, "grad_norm": 1237.0350341796875, "learning_rate": 4.3555555555555555e-06, "loss": 202.0422, "step": 5390 }, { "epoch": 0.04467055465938702, "grad_norm": 1546.1956787109375, "learning_rate": 4.363636363636364e-06, "loss": 195.5879, "step": 5400 }, { "epoch": 0.04475327790875626, "grad_norm": 1744.332763671875, "learning_rate": 4.3717171717171715e-06, "loss": 308.2887, "step": 5410 }, { "epoch": 0.04483600115812549, "grad_norm": 1885.436767578125, "learning_rate": 4.37979797979798e-06, "loss": 257.4365, "step": 5420 }, { "epoch": 0.044918724407494726, "grad_norm": 1949.8758544921875, "learning_rate": 4.387878787878788e-06, "loss": 288.0396, "step": 5430 }, { "epoch": 0.04500144765686396, "grad_norm": 2583.996826171875, "learning_rate": 4.395959595959596e-06, "loss": 243.6479, "step": 5440 }, { "epoch": 0.0450841709062332, "grad_norm": 940.0198974609375, "learning_rate": 4.4040404040404044e-06, "loss": 209.4657, "step": 5450 }, { "epoch": 0.04516689415560243, "grad_norm": 897.3545532226562, "learning_rate": 4.412121212121213e-06, "loss": 244.724, "step": 5460 }, { "epoch": 0.045249617404971666, "grad_norm": 1344.112060546875, "learning_rate": 4.4202020202020205e-06, "loss": 210.6839, "step": 5470 }, { "epoch": 0.0453323406543409, "grad_norm": 2638.66357421875, "learning_rate": 4.428282828282829e-06, "loss": 281.9325, "step": 5480 }, { "epoch": 0.04541506390371014, "grad_norm": 1480.95068359375, "learning_rate": 4.436363636363637e-06, "loss": 227.8617, "step": 5490 }, { "epoch": 0.04549778715307937, "grad_norm": 1484.61962890625, "learning_rate": 4.444444444444444e-06, "loss": 307.6048, "step": 5500 }, { "epoch": 0.04558051040244861, "grad_norm": 921.8578491210938, "learning_rate": 4.452525252525253e-06, "loss": 283.7532, "step": 5510 }, { "epoch": 0.04566323365181784, "grad_norm": 1015.0638427734375, "learning_rate": 4.460606060606061e-06, "loss": 232.6935, "step": 5520 }, { "epoch": 0.04574595690118708, "grad_norm": 1154.900146484375, "learning_rate": 4.468686868686869e-06, "loss": 237.65, "step": 5530 }, { "epoch": 0.045828680150556314, "grad_norm": 1067.7034912109375, "learning_rate": 4.476767676767677e-06, "loss": 178.2383, "step": 5540 }, { "epoch": 0.04591140339992555, "grad_norm": 1582.29541015625, "learning_rate": 4.4848484848484855e-06, "loss": 208.2042, "step": 5550 }, { "epoch": 0.04599412664929478, "grad_norm": 1326.168212890625, "learning_rate": 4.492929292929293e-06, "loss": 206.2426, "step": 5560 }, { "epoch": 0.04607684989866402, "grad_norm": 1166.575927734375, "learning_rate": 4.501010101010102e-06, "loss": 246.9012, "step": 5570 }, { "epoch": 0.046159573148033255, "grad_norm": 1051.04345703125, "learning_rate": 4.50909090909091e-06, "loss": 220.1656, "step": 5580 }, { "epoch": 0.04624229639740249, "grad_norm": 994.1414794921875, "learning_rate": 4.517171717171718e-06, "loss": 246.7432, "step": 5590 }, { "epoch": 0.04632501964677173, "grad_norm": 1792.089111328125, "learning_rate": 4.525252525252526e-06, "loss": 195.5538, "step": 5600 }, { "epoch": 0.04640774289614096, "grad_norm": 889.2850341796875, "learning_rate": 4.533333333333334e-06, "loss": 205.7484, "step": 5610 }, { "epoch": 0.046490466145510195, "grad_norm": 986.1232299804688, "learning_rate": 4.541414141414141e-06, "loss": 259.9392, "step": 5620 }, { "epoch": 0.04657318939487943, "grad_norm": 1162.5496826171875, "learning_rate": 4.54949494949495e-06, "loss": 215.2503, "step": 5630 }, { "epoch": 0.04665591264424867, "grad_norm": 1568.9271240234375, "learning_rate": 4.557575757575758e-06, "loss": 198.5209, "step": 5640 }, { "epoch": 0.0467386358936179, "grad_norm": 1163.0565185546875, "learning_rate": 4.565656565656566e-06, "loss": 199.4977, "step": 5650 }, { "epoch": 0.046821359142987136, "grad_norm": 918.8221435546875, "learning_rate": 4.573737373737374e-06, "loss": 292.0952, "step": 5660 }, { "epoch": 0.04690408239235637, "grad_norm": 1420.076171875, "learning_rate": 4.581818181818183e-06, "loss": 273.1453, "step": 5670 }, { "epoch": 0.04698680564172561, "grad_norm": 751.3970947265625, "learning_rate": 4.58989898989899e-06, "loss": 227.5678, "step": 5680 }, { "epoch": 0.04706952889109484, "grad_norm": 4106.84814453125, "learning_rate": 4.597979797979799e-06, "loss": 257.4646, "step": 5690 }, { "epoch": 0.04715225214046408, "grad_norm": 1618.319091796875, "learning_rate": 4.606060606060606e-06, "loss": 242.2784, "step": 5700 }, { "epoch": 0.04723497538983331, "grad_norm": 1682.986083984375, "learning_rate": 4.614141414141414e-06, "loss": 270.8784, "step": 5710 }, { "epoch": 0.04731769863920255, "grad_norm": 2293.774658203125, "learning_rate": 4.622222222222222e-06, "loss": 310.2911, "step": 5720 }, { "epoch": 0.047400421888571784, "grad_norm": 896.1268920898438, "learning_rate": 4.630303030303031e-06, "loss": 262.9586, "step": 5730 }, { "epoch": 0.04748314513794102, "grad_norm": 1522.369384765625, "learning_rate": 4.6383838383838384e-06, "loss": 200.667, "step": 5740 }, { "epoch": 0.04756586838731025, "grad_norm": 1271.126220703125, "learning_rate": 4.646464646464647e-06, "loss": 211.7462, "step": 5750 }, { "epoch": 0.04764859163667949, "grad_norm": 1342.4813232421875, "learning_rate": 4.654545454545455e-06, "loss": 271.1495, "step": 5760 }, { "epoch": 0.047731314886048724, "grad_norm": 1042.1790771484375, "learning_rate": 4.662626262626263e-06, "loss": 207.4777, "step": 5770 }, { "epoch": 0.04781403813541796, "grad_norm": 1017.2898559570312, "learning_rate": 4.670707070707071e-06, "loss": 272.7402, "step": 5780 }, { "epoch": 0.04789676138478719, "grad_norm": 1240.869140625, "learning_rate": 4.678787878787879e-06, "loss": 224.074, "step": 5790 }, { "epoch": 0.04797948463415643, "grad_norm": 1895.0374755859375, "learning_rate": 4.6868686868686874e-06, "loss": 348.4789, "step": 5800 }, { "epoch": 0.048062207883525665, "grad_norm": 1665.503662109375, "learning_rate": 4.694949494949496e-06, "loss": 287.3858, "step": 5810 }, { "epoch": 0.0481449311328949, "grad_norm": 888.2938842773438, "learning_rate": 4.7030303030303035e-06, "loss": 186.7068, "step": 5820 }, { "epoch": 0.04822765438226414, "grad_norm": 1077.885009765625, "learning_rate": 4.711111111111111e-06, "loss": 188.8208, "step": 5830 }, { "epoch": 0.04831037763163337, "grad_norm": 1367.186279296875, "learning_rate": 4.7191919191919195e-06, "loss": 235.7021, "step": 5840 }, { "epoch": 0.048393100881002606, "grad_norm": 880.5682373046875, "learning_rate": 4.727272727272728e-06, "loss": 150.1969, "step": 5850 }, { "epoch": 0.04847582413037184, "grad_norm": 1563.840576171875, "learning_rate": 4.735353535353536e-06, "loss": 213.3092, "step": 5860 }, { "epoch": 0.04855854737974108, "grad_norm": 1928.1456298828125, "learning_rate": 4.743434343434344e-06, "loss": 258.8421, "step": 5870 }, { "epoch": 0.04864127062911031, "grad_norm": 1255.3079833984375, "learning_rate": 4.751515151515152e-06, "loss": 242.0784, "step": 5880 }, { "epoch": 0.048723993878479546, "grad_norm": 1542.299072265625, "learning_rate": 4.75959595959596e-06, "loss": 303.837, "step": 5890 }, { "epoch": 0.04880671712784878, "grad_norm": 2009.2406005859375, "learning_rate": 4.7676767676767685e-06, "loss": 155.5049, "step": 5900 }, { "epoch": 0.04888944037721802, "grad_norm": 1701.25830078125, "learning_rate": 4.775757575757576e-06, "loss": 289.9675, "step": 5910 }, { "epoch": 0.04897216362658725, "grad_norm": 1794.075439453125, "learning_rate": 4.783838383838385e-06, "loss": 206.6406, "step": 5920 }, { "epoch": 0.04905488687595649, "grad_norm": 2225.670654296875, "learning_rate": 4.791919191919192e-06, "loss": 241.3904, "step": 5930 }, { "epoch": 0.04913761012532572, "grad_norm": 1904.87939453125, "learning_rate": 4.800000000000001e-06, "loss": 245.3052, "step": 5940 }, { "epoch": 0.04922033337469496, "grad_norm": 1126.554931640625, "learning_rate": 4.808080808080808e-06, "loss": 188.594, "step": 5950 }, { "epoch": 0.049303056624064194, "grad_norm": 691.1046142578125, "learning_rate": 4.816161616161617e-06, "loss": 248.3712, "step": 5960 }, { "epoch": 0.04938577987343343, "grad_norm": 1169.880859375, "learning_rate": 4.824242424242424e-06, "loss": 245.4409, "step": 5970 }, { "epoch": 0.04946850312280266, "grad_norm": 1042.1029052734375, "learning_rate": 4.832323232323233e-06, "loss": 202.4587, "step": 5980 }, { "epoch": 0.0495512263721719, "grad_norm": 1452.813720703125, "learning_rate": 4.840404040404041e-06, "loss": 257.6912, "step": 5990 }, { "epoch": 0.049633949621541135, "grad_norm": 1247.24609375, "learning_rate": 4.848484848484849e-06, "loss": 210.8361, "step": 6000 }, { "epoch": 0.04971667287091037, "grad_norm": 1635.0625, "learning_rate": 4.856565656565657e-06, "loss": 241.0676, "step": 6010 }, { "epoch": 0.0497993961202796, "grad_norm": 1609.6522216796875, "learning_rate": 4.864646464646466e-06, "loss": 188.2872, "step": 6020 }, { "epoch": 0.04988211936964884, "grad_norm": 1085.433837890625, "learning_rate": 4.872727272727273e-06, "loss": 194.121, "step": 6030 }, { "epoch": 0.049964842619018075, "grad_norm": 1211.4505615234375, "learning_rate": 4.880808080808081e-06, "loss": 237.9093, "step": 6040 }, { "epoch": 0.05004756586838731, "grad_norm": 1017.5370483398438, "learning_rate": 4.888888888888889e-06, "loss": 298.4579, "step": 6050 }, { "epoch": 0.05013028911775654, "grad_norm": 1557.1414794921875, "learning_rate": 4.896969696969697e-06, "loss": 254.7002, "step": 6060 }, { "epoch": 0.05021301236712578, "grad_norm": 966.5237426757812, "learning_rate": 4.905050505050505e-06, "loss": 218.6844, "step": 6070 }, { "epoch": 0.050295735616495016, "grad_norm": 1119.049072265625, "learning_rate": 4.913131313131314e-06, "loss": 185.6555, "step": 6080 }, { "epoch": 0.05037845886586425, "grad_norm": 813.0082397460938, "learning_rate": 4.9212121212121214e-06, "loss": 193.8167, "step": 6090 }, { "epoch": 0.05046118211523349, "grad_norm": 1165.0540771484375, "learning_rate": 4.92929292929293e-06, "loss": 270.3936, "step": 6100 }, { "epoch": 0.05054390536460272, "grad_norm": 774.7178344726562, "learning_rate": 4.937373737373738e-06, "loss": 187.8722, "step": 6110 }, { "epoch": 0.050626628613971957, "grad_norm": 1326.367919921875, "learning_rate": 4.945454545454546e-06, "loss": 243.3256, "step": 6120 }, { "epoch": 0.05070935186334119, "grad_norm": 1486.3304443359375, "learning_rate": 4.953535353535354e-06, "loss": 186.0064, "step": 6130 }, { "epoch": 0.05079207511271043, "grad_norm": 1937.40234375, "learning_rate": 4.961616161616162e-06, "loss": 246.7979, "step": 6140 }, { "epoch": 0.050874798362079664, "grad_norm": 1517.21923828125, "learning_rate": 4.9696969696969696e-06, "loss": 231.4793, "step": 6150 }, { "epoch": 0.0509575216114489, "grad_norm": 5960.52197265625, "learning_rate": 4.977777777777778e-06, "loss": 197.895, "step": 6160 }, { "epoch": 0.05104024486081813, "grad_norm": 1942.2510986328125, "learning_rate": 4.9858585858585865e-06, "loss": 300.5423, "step": 6170 }, { "epoch": 0.05112296811018737, "grad_norm": 1276.1055908203125, "learning_rate": 4.993939393939394e-06, "loss": 288.2531, "step": 6180 }, { "epoch": 0.051205691359556604, "grad_norm": 2154.565673828125, "learning_rate": 5.0020202020202025e-06, "loss": 259.9872, "step": 6190 }, { "epoch": 0.05128841460892584, "grad_norm": 961.3928833007812, "learning_rate": 5.010101010101011e-06, "loss": 177.8174, "step": 6200 }, { "epoch": 0.05137113785829507, "grad_norm": 1438.574462890625, "learning_rate": 5.0181818181818186e-06, "loss": 197.5066, "step": 6210 }, { "epoch": 0.05145386110766431, "grad_norm": 766.4448852539062, "learning_rate": 5.026262626262627e-06, "loss": 257.3491, "step": 6220 }, { "epoch": 0.051536584357033545, "grad_norm": 793.5451049804688, "learning_rate": 5.034343434343435e-06, "loss": 184.9512, "step": 6230 }, { "epoch": 0.05161930760640278, "grad_norm": 869.6885375976562, "learning_rate": 5.042424242424243e-06, "loss": 253.1606, "step": 6240 }, { "epoch": 0.05170203085577201, "grad_norm": 1229.890869140625, "learning_rate": 5.0505050505050515e-06, "loss": 184.6765, "step": 6250 }, { "epoch": 0.05178475410514125, "grad_norm": 804.93994140625, "learning_rate": 5.058585858585859e-06, "loss": 162.1275, "step": 6260 }, { "epoch": 0.051867477354510486, "grad_norm": 1098.615478515625, "learning_rate": 5.0666666666666676e-06, "loss": 174.5565, "step": 6270 }, { "epoch": 0.05195020060387972, "grad_norm": 570.5386352539062, "learning_rate": 5.074747474747476e-06, "loss": 182.8037, "step": 6280 }, { "epoch": 0.05203292385324895, "grad_norm": 3363.004150390625, "learning_rate": 5.082828282828284e-06, "loss": 265.7634, "step": 6290 }, { "epoch": 0.05211564710261819, "grad_norm": 2040.7962646484375, "learning_rate": 5.090909090909091e-06, "loss": 240.1976, "step": 6300 }, { "epoch": 0.052198370351987426, "grad_norm": 1316.55908203125, "learning_rate": 5.098989898989899e-06, "loss": 249.6734, "step": 6310 }, { "epoch": 0.05228109360135666, "grad_norm": 2261.989013671875, "learning_rate": 5.107070707070707e-06, "loss": 208.5467, "step": 6320 }, { "epoch": 0.0523638168507259, "grad_norm": 1246.7120361328125, "learning_rate": 5.115151515151515e-06, "loss": 207.1795, "step": 6330 }, { "epoch": 0.05244654010009513, "grad_norm": 1235.8924560546875, "learning_rate": 5.123232323232323e-06, "loss": 231.5552, "step": 6340 }, { "epoch": 0.05252926334946437, "grad_norm": 805.1367797851562, "learning_rate": 5.131313131313132e-06, "loss": 217.3461, "step": 6350 }, { "epoch": 0.0526119865988336, "grad_norm": 902.8432006835938, "learning_rate": 5.139393939393939e-06, "loss": 250.8667, "step": 6360 }, { "epoch": 0.05269470984820284, "grad_norm": 1314.75634765625, "learning_rate": 5.147474747474748e-06, "loss": 188.1431, "step": 6370 }, { "epoch": 0.052777433097572074, "grad_norm": 1282.649169921875, "learning_rate": 5.155555555555556e-06, "loss": 198.1481, "step": 6380 }, { "epoch": 0.05286015634694131, "grad_norm": 1167.904052734375, "learning_rate": 5.163636363636364e-06, "loss": 192.307, "step": 6390 }, { "epoch": 0.05294287959631054, "grad_norm": 686.8750610351562, "learning_rate": 5.171717171717172e-06, "loss": 225.3385, "step": 6400 }, { "epoch": 0.05302560284567978, "grad_norm": 1144.7125244140625, "learning_rate": 5.17979797979798e-06, "loss": 188.2769, "step": 6410 }, { "epoch": 0.053108326095049015, "grad_norm": 1010.5237426757812, "learning_rate": 5.187878787878788e-06, "loss": 225.7106, "step": 6420 }, { "epoch": 0.05319104934441825, "grad_norm": 1336.3463134765625, "learning_rate": 5.195959595959597e-06, "loss": 208.7707, "step": 6430 }, { "epoch": 0.05327377259378748, "grad_norm": 1576.3480224609375, "learning_rate": 5.204040404040404e-06, "loss": 236.1072, "step": 6440 }, { "epoch": 0.05335649584315672, "grad_norm": 1445.076416015625, "learning_rate": 5.212121212121213e-06, "loss": 271.4749, "step": 6450 }, { "epoch": 0.053439219092525955, "grad_norm": 895.9810791015625, "learning_rate": 5.220202020202021e-06, "loss": 258.855, "step": 6460 }, { "epoch": 0.05352194234189519, "grad_norm": 952.8675537109375, "learning_rate": 5.228282828282829e-06, "loss": 175.3387, "step": 6470 }, { "epoch": 0.05360466559126442, "grad_norm": 1066.5716552734375, "learning_rate": 5.236363636363637e-06, "loss": 218.2292, "step": 6480 }, { "epoch": 0.05368738884063366, "grad_norm": 849.3695678710938, "learning_rate": 5.244444444444445e-06, "loss": 221.0137, "step": 6490 }, { "epoch": 0.053770112090002896, "grad_norm": 1675.2149658203125, "learning_rate": 5.252525252525253e-06, "loss": 223.2969, "step": 6500 }, { "epoch": 0.05385283533937213, "grad_norm": 1693.541748046875, "learning_rate": 5.26060606060606e-06, "loss": 229.6179, "step": 6510 }, { "epoch": 0.05393555858874136, "grad_norm": 1324.1651611328125, "learning_rate": 5.268686868686869e-06, "loss": 244.8779, "step": 6520 }, { "epoch": 0.0540182818381106, "grad_norm": 1403.1368408203125, "learning_rate": 5.276767676767677e-06, "loss": 268.8457, "step": 6530 }, { "epoch": 0.054101005087479836, "grad_norm": 1165.638671875, "learning_rate": 5.284848484848485e-06, "loss": 239.8147, "step": 6540 }, { "epoch": 0.05418372833684907, "grad_norm": 995.2945556640625, "learning_rate": 5.292929292929293e-06, "loss": 229.9961, "step": 6550 }, { "epoch": 0.0542664515862183, "grad_norm": 1505.5985107421875, "learning_rate": 5.3010101010101016e-06, "loss": 223.1589, "step": 6560 }, { "epoch": 0.054349174835587544, "grad_norm": 1667.1300048828125, "learning_rate": 5.309090909090909e-06, "loss": 217.6828, "step": 6570 }, { "epoch": 0.05443189808495678, "grad_norm": 1639.0364990234375, "learning_rate": 5.317171717171718e-06, "loss": 205.0005, "step": 6580 }, { "epoch": 0.05451462133432601, "grad_norm": 1414.21240234375, "learning_rate": 5.325252525252525e-06, "loss": 215.9356, "step": 6590 }, { "epoch": 0.05459734458369525, "grad_norm": 586.2081298828125, "learning_rate": 5.333333333333334e-06, "loss": 227.373, "step": 6600 }, { "epoch": 0.054680067833064484, "grad_norm": 2097.269775390625, "learning_rate": 5.341414141414142e-06, "loss": 234.4245, "step": 6610 }, { "epoch": 0.05476279108243372, "grad_norm": 1416.9189453125, "learning_rate": 5.34949494949495e-06, "loss": 262.4427, "step": 6620 }, { "epoch": 0.05484551433180295, "grad_norm": 1144.749755859375, "learning_rate": 5.357575757575758e-06, "loss": 201.3394, "step": 6630 }, { "epoch": 0.05492823758117219, "grad_norm": 1287.0030517578125, "learning_rate": 5.365656565656567e-06, "loss": 281.6339, "step": 6640 }, { "epoch": 0.055010960830541425, "grad_norm": 1555.04150390625, "learning_rate": 5.373737373737374e-06, "loss": 225.9727, "step": 6650 }, { "epoch": 0.05509368407991066, "grad_norm": 1129.1629638671875, "learning_rate": 5.381818181818183e-06, "loss": 159.6996, "step": 6660 }, { "epoch": 0.05517640732927989, "grad_norm": 1159.3424072265625, "learning_rate": 5.38989898989899e-06, "loss": 226.1631, "step": 6670 }, { "epoch": 0.05525913057864913, "grad_norm": 1065.8494873046875, "learning_rate": 5.397979797979799e-06, "loss": 213.2366, "step": 6680 }, { "epoch": 0.055341853828018366, "grad_norm": 801.6089477539062, "learning_rate": 5.406060606060607e-06, "loss": 180.9512, "step": 6690 }, { "epoch": 0.0554245770773876, "grad_norm": 1254.9515380859375, "learning_rate": 5.414141414141415e-06, "loss": 223.2975, "step": 6700 }, { "epoch": 0.05550730032675683, "grad_norm": 1734.849609375, "learning_rate": 5.422222222222223e-06, "loss": 179.4333, "step": 6710 }, { "epoch": 0.05559002357612607, "grad_norm": 1987.9564208984375, "learning_rate": 5.430303030303032e-06, "loss": 248.2462, "step": 6720 }, { "epoch": 0.055672746825495306, "grad_norm": 1181.2037353515625, "learning_rate": 5.438383838383838e-06, "loss": 182.878, "step": 6730 }, { "epoch": 0.05575547007486454, "grad_norm": 1901.66015625, "learning_rate": 5.446464646464647e-06, "loss": 251.8006, "step": 6740 }, { "epoch": 0.05583819332423377, "grad_norm": 632.8333740234375, "learning_rate": 5.4545454545454545e-06, "loss": 196.6042, "step": 6750 }, { "epoch": 0.05592091657360301, "grad_norm": 604.5985717773438, "learning_rate": 5.462626262626263e-06, "loss": 177.947, "step": 6760 }, { "epoch": 0.05600363982297225, "grad_norm": 1062.9954833984375, "learning_rate": 5.4707070707070705e-06, "loss": 312.2176, "step": 6770 }, { "epoch": 0.05608636307234148, "grad_norm": 1964.1070556640625, "learning_rate": 5.478787878787879e-06, "loss": 192.9068, "step": 6780 }, { "epoch": 0.056169086321710714, "grad_norm": 1671.50146484375, "learning_rate": 5.486868686868687e-06, "loss": 236.6793, "step": 6790 }, { "epoch": 0.056251809571079954, "grad_norm": 1363.7489013671875, "learning_rate": 5.494949494949495e-06, "loss": 207.5258, "step": 6800 }, { "epoch": 0.05633453282044919, "grad_norm": 1217.836669921875, "learning_rate": 5.5030303030303034e-06, "loss": 185.7013, "step": 6810 }, { "epoch": 0.05641725606981842, "grad_norm": 1585.8988037109375, "learning_rate": 5.511111111111112e-06, "loss": 217.5803, "step": 6820 }, { "epoch": 0.056499979319187654, "grad_norm": 983.3916625976562, "learning_rate": 5.5191919191919195e-06, "loss": 224.2803, "step": 6830 }, { "epoch": 0.056582702568556895, "grad_norm": 1328.7664794921875, "learning_rate": 5.527272727272728e-06, "loss": 213.4681, "step": 6840 }, { "epoch": 0.05666542581792613, "grad_norm": 908.5962524414062, "learning_rate": 5.5353535353535355e-06, "loss": 191.1099, "step": 6850 }, { "epoch": 0.05674814906729536, "grad_norm": 965.225341796875, "learning_rate": 5.543434343434344e-06, "loss": 180.1947, "step": 6860 }, { "epoch": 0.0568308723166646, "grad_norm": 1269.7061767578125, "learning_rate": 5.5515151515151524e-06, "loss": 235.1135, "step": 6870 }, { "epoch": 0.056913595566033835, "grad_norm": 1158.512939453125, "learning_rate": 5.55959595959596e-06, "loss": 215.2743, "step": 6880 }, { "epoch": 0.05699631881540307, "grad_norm": 1231.1844482421875, "learning_rate": 5.5676767676767685e-06, "loss": 327.6613, "step": 6890 }, { "epoch": 0.0570790420647723, "grad_norm": 1180.1192626953125, "learning_rate": 5.575757575757577e-06, "loss": 228.0458, "step": 6900 }, { "epoch": 0.05716176531414154, "grad_norm": 952.682861328125, "learning_rate": 5.5838383838383845e-06, "loss": 250.6257, "step": 6910 }, { "epoch": 0.057244488563510776, "grad_norm": 16013.0146484375, "learning_rate": 5.591919191919193e-06, "loss": 250.4285, "step": 6920 }, { "epoch": 0.05732721181288001, "grad_norm": 1266.31787109375, "learning_rate": 5.600000000000001e-06, "loss": 213.8897, "step": 6930 }, { "epoch": 0.05740993506224924, "grad_norm": 1832.4088134765625, "learning_rate": 5.608080808080808e-06, "loss": 218.5001, "step": 6940 }, { "epoch": 0.05749265831161848, "grad_norm": 1213.3984375, "learning_rate": 5.616161616161616e-06, "loss": 211.7139, "step": 6950 }, { "epoch": 0.057575381560987716, "grad_norm": 1369.9437255859375, "learning_rate": 5.624242424242424e-06, "loss": 192.9413, "step": 6960 }, { "epoch": 0.05765810481035695, "grad_norm": 1018.5819091796875, "learning_rate": 5.632323232323233e-06, "loss": 150.2073, "step": 6970 }, { "epoch": 0.05774082805972618, "grad_norm": 1435.1353759765625, "learning_rate": 5.64040404040404e-06, "loss": 187.5981, "step": 6980 }, { "epoch": 0.057823551309095424, "grad_norm": 1339.41650390625, "learning_rate": 5.648484848484849e-06, "loss": 165.7803, "step": 6990 }, { "epoch": 0.05790627455846466, "grad_norm": 3150.48974609375, "learning_rate": 5.656565656565657e-06, "loss": 183.762, "step": 7000 }, { "epoch": 0.05798899780783389, "grad_norm": 1491.4512939453125, "learning_rate": 5.664646464646465e-06, "loss": 205.812, "step": 7010 }, { "epoch": 0.058071721057203124, "grad_norm": 1045.4873046875, "learning_rate": 5.672727272727273e-06, "loss": 222.1431, "step": 7020 }, { "epoch": 0.058154444306572364, "grad_norm": 1604.70556640625, "learning_rate": 5.680808080808081e-06, "loss": 312.9387, "step": 7030 }, { "epoch": 0.0582371675559416, "grad_norm": 794.0164184570312, "learning_rate": 5.688888888888889e-06, "loss": 148.5473, "step": 7040 }, { "epoch": 0.05831989080531083, "grad_norm": 1710.36328125, "learning_rate": 5.696969696969698e-06, "loss": 187.5921, "step": 7050 }, { "epoch": 0.058402614054680065, "grad_norm": 1974.2613525390625, "learning_rate": 5.705050505050505e-06, "loss": 192.2352, "step": 7060 }, { "epoch": 0.058485337304049305, "grad_norm": 1533.34619140625, "learning_rate": 5.713131313131314e-06, "loss": 189.592, "step": 7070 }, { "epoch": 0.05856806055341854, "grad_norm": 1127.27392578125, "learning_rate": 5.721212121212122e-06, "loss": 211.6275, "step": 7080 }, { "epoch": 0.05865078380278777, "grad_norm": 1902.27001953125, "learning_rate": 5.72929292929293e-06, "loss": 189.9227, "step": 7090 }, { "epoch": 0.05873350705215701, "grad_norm": 1564.533203125, "learning_rate": 5.737373737373738e-06, "loss": 231.5597, "step": 7100 }, { "epoch": 0.058816230301526246, "grad_norm": 2943.590576171875, "learning_rate": 5.745454545454546e-06, "loss": 177.4852, "step": 7110 }, { "epoch": 0.05889895355089548, "grad_norm": 1309.1622314453125, "learning_rate": 5.753535353535354e-06, "loss": 165.6322, "step": 7120 }, { "epoch": 0.05898167680026471, "grad_norm": 1501.813720703125, "learning_rate": 5.761616161616163e-06, "loss": 189.5367, "step": 7130 }, { "epoch": 0.05906440004963395, "grad_norm": 1574.177978515625, "learning_rate": 5.76969696969697e-06, "loss": 234.3292, "step": 7140 }, { "epoch": 0.059147123299003186, "grad_norm": 1052.1170654296875, "learning_rate": 5.777777777777778e-06, "loss": 214.4601, "step": 7150 }, { "epoch": 0.05922984654837242, "grad_norm": 1018.6898803710938, "learning_rate": 5.785858585858586e-06, "loss": 156.9678, "step": 7160 }, { "epoch": 0.05931256979774165, "grad_norm": 1917.61865234375, "learning_rate": 5.793939393939394e-06, "loss": 233.6657, "step": 7170 }, { "epoch": 0.05939529304711089, "grad_norm": 1654.3441162109375, "learning_rate": 5.8020202020202025e-06, "loss": 173.1022, "step": 7180 }, { "epoch": 0.05947801629648013, "grad_norm": 1147.4951171875, "learning_rate": 5.81010101010101e-06, "loss": 209.7125, "step": 7190 }, { "epoch": 0.05956073954584936, "grad_norm": 1505.357177734375, "learning_rate": 5.8181818181818185e-06, "loss": 239.9892, "step": 7200 }, { "epoch": 0.059643462795218594, "grad_norm": 3524.7958984375, "learning_rate": 5.826262626262626e-06, "loss": 199.2161, "step": 7210 }, { "epoch": 0.059726186044587834, "grad_norm": 2917.4033203125, "learning_rate": 5.834343434343435e-06, "loss": 282.2665, "step": 7220 }, { "epoch": 0.05980890929395707, "grad_norm": 976.106689453125, "learning_rate": 5.842424242424243e-06, "loss": 220.0685, "step": 7230 }, { "epoch": 0.0598916325433263, "grad_norm": 1532.435791015625, "learning_rate": 5.850505050505051e-06, "loss": 247.5289, "step": 7240 }, { "epoch": 0.059974355792695534, "grad_norm": 868.5018920898438, "learning_rate": 5.858585858585859e-06, "loss": 204.2799, "step": 7250 }, { "epoch": 0.060057079042064775, "grad_norm": 1144.6495361328125, "learning_rate": 5.8666666666666675e-06, "loss": 233.1151, "step": 7260 }, { "epoch": 0.06013980229143401, "grad_norm": 1262.310302734375, "learning_rate": 5.874747474747475e-06, "loss": 215.0127, "step": 7270 }, { "epoch": 0.06022252554080324, "grad_norm": 1465.82568359375, "learning_rate": 5.882828282828284e-06, "loss": 214.6379, "step": 7280 }, { "epoch": 0.060305248790172475, "grad_norm": 890.1478271484375, "learning_rate": 5.890909090909091e-06, "loss": 212.3409, "step": 7290 }, { "epoch": 0.060387972039541715, "grad_norm": 804.8695068359375, "learning_rate": 5.8989898989899e-06, "loss": 161.0226, "step": 7300 }, { "epoch": 0.06047069528891095, "grad_norm": 1049.8092041015625, "learning_rate": 5.907070707070708e-06, "loss": 228.0541, "step": 7310 }, { "epoch": 0.06055341853828018, "grad_norm": 1314.708984375, "learning_rate": 5.915151515151516e-06, "loss": 169.0287, "step": 7320 }, { "epoch": 0.060636141787649415, "grad_norm": 877.56982421875, "learning_rate": 5.923232323232324e-06, "loss": 182.722, "step": 7330 }, { "epoch": 0.060718865037018656, "grad_norm": 1187.692138671875, "learning_rate": 5.9313131313131326e-06, "loss": 216.9126, "step": 7340 }, { "epoch": 0.06080158828638789, "grad_norm": 910.7957763671875, "learning_rate": 5.93939393939394e-06, "loss": 156.4217, "step": 7350 }, { "epoch": 0.06088431153575712, "grad_norm": 937.8931274414062, "learning_rate": 5.947474747474749e-06, "loss": 206.2811, "step": 7360 }, { "epoch": 0.06096703478512636, "grad_norm": 1068.356201171875, "learning_rate": 5.955555555555555e-06, "loss": 209.255, "step": 7370 }, { "epoch": 0.061049758034495596, "grad_norm": 1415.2979736328125, "learning_rate": 5.963636363636364e-06, "loss": 180.4899, "step": 7380 }, { "epoch": 0.06113248128386483, "grad_norm": 1216.490966796875, "learning_rate": 5.9717171717171714e-06, "loss": 181.2195, "step": 7390 }, { "epoch": 0.06121520453323406, "grad_norm": 1969.770751953125, "learning_rate": 5.97979797979798e-06, "loss": 185.2952, "step": 7400 }, { "epoch": 0.061297927782603304, "grad_norm": 1073.1572265625, "learning_rate": 5.987878787878788e-06, "loss": 194.6978, "step": 7410 }, { "epoch": 0.06138065103197254, "grad_norm": 1329.2269287109375, "learning_rate": 5.995959595959596e-06, "loss": 225.0991, "step": 7420 }, { "epoch": 0.06146337428134177, "grad_norm": 1447.0704345703125, "learning_rate": 6.004040404040404e-06, "loss": 222.2881, "step": 7430 }, { "epoch": 0.061546097530711004, "grad_norm": 1847.17578125, "learning_rate": 6.012121212121213e-06, "loss": 250.8386, "step": 7440 }, { "epoch": 0.061628820780080244, "grad_norm": 1011.4767456054688, "learning_rate": 6.0202020202020204e-06, "loss": 217.8489, "step": 7450 }, { "epoch": 0.06171154402944948, "grad_norm": 905.0313110351562, "learning_rate": 6.028282828282829e-06, "loss": 176.8829, "step": 7460 }, { "epoch": 0.06179426727881871, "grad_norm": 1627.3341064453125, "learning_rate": 6.0363636363636365e-06, "loss": 245.858, "step": 7470 }, { "epoch": 0.061876990528187945, "grad_norm": 1333.339111328125, "learning_rate": 6.044444444444445e-06, "loss": 172.512, "step": 7480 }, { "epoch": 0.061959713777557185, "grad_norm": 981.724609375, "learning_rate": 6.052525252525253e-06, "loss": 188.6218, "step": 7490 }, { "epoch": 0.06204243702692642, "grad_norm": 963.2952880859375, "learning_rate": 6.060606060606061e-06, "loss": 200.9197, "step": 7500 }, { "epoch": 0.06212516027629565, "grad_norm": 1976.6124267578125, "learning_rate": 6.068686868686869e-06, "loss": 206.6058, "step": 7510 }, { "epoch": 0.062207883525664885, "grad_norm": 964.7189331054688, "learning_rate": 6.076767676767678e-06, "loss": 244.5135, "step": 7520 }, { "epoch": 0.062290606775034126, "grad_norm": 940.2135009765625, "learning_rate": 6.0848484848484855e-06, "loss": 286.4893, "step": 7530 }, { "epoch": 0.06237333002440336, "grad_norm": 1086.095947265625, "learning_rate": 6.092929292929294e-06, "loss": 166.7001, "step": 7540 }, { "epoch": 0.06245605327377259, "grad_norm": 824.2728881835938, "learning_rate": 6.1010101010101015e-06, "loss": 178.9928, "step": 7550 }, { "epoch": 0.06253877652314183, "grad_norm": 1227.130859375, "learning_rate": 6.10909090909091e-06, "loss": 188.424, "step": 7560 }, { "epoch": 0.06262149977251107, "grad_norm": 943.979736328125, "learning_rate": 6.117171717171718e-06, "loss": 170.1881, "step": 7570 }, { "epoch": 0.06270422302188029, "grad_norm": 893.2919921875, "learning_rate": 6.125252525252525e-06, "loss": 203.6881, "step": 7580 }, { "epoch": 0.06278694627124953, "grad_norm": 1502.2542724609375, "learning_rate": 6.133333333333334e-06, "loss": 227.4439, "step": 7590 }, { "epoch": 0.06286966952061877, "grad_norm": 2379.823974609375, "learning_rate": 6.141414141414141e-06, "loss": 216.3211, "step": 7600 }, { "epoch": 0.062952392769988, "grad_norm": 2946.320068359375, "learning_rate": 6.14949494949495e-06, "loss": 181.0182, "step": 7610 }, { "epoch": 0.06303511601935724, "grad_norm": 1127.286865234375, "learning_rate": 6.157575757575758e-06, "loss": 219.9298, "step": 7620 }, { "epoch": 0.06311783926872648, "grad_norm": 2195.520751953125, "learning_rate": 6.165656565656566e-06, "loss": 225.9704, "step": 7630 }, { "epoch": 0.06320056251809571, "grad_norm": 1115.85107421875, "learning_rate": 6.173737373737374e-06, "loss": 192.5431, "step": 7640 }, { "epoch": 0.06328328576746495, "grad_norm": 1610.82861328125, "learning_rate": 6.181818181818182e-06, "loss": 234.5798, "step": 7650 }, { "epoch": 0.06336600901683419, "grad_norm": 2228.57080078125, "learning_rate": 6.18989898989899e-06, "loss": 203.8422, "step": 7660 }, { "epoch": 0.06344873226620341, "grad_norm": 1072.0853271484375, "learning_rate": 6.197979797979799e-06, "loss": 136.4943, "step": 7670 }, { "epoch": 0.06353145551557265, "grad_norm": 987.9502563476562, "learning_rate": 6.206060606060606e-06, "loss": 176.214, "step": 7680 }, { "epoch": 0.06361417876494188, "grad_norm": 1846.4237060546875, "learning_rate": 6.214141414141415e-06, "loss": 328.0838, "step": 7690 }, { "epoch": 0.06369690201431112, "grad_norm": 1142.6070556640625, "learning_rate": 6.222222222222223e-06, "loss": 197.7707, "step": 7700 }, { "epoch": 0.06377962526368036, "grad_norm": 2267.01904296875, "learning_rate": 6.230303030303031e-06, "loss": 246.8396, "step": 7710 }, { "epoch": 0.06386234851304959, "grad_norm": 1803.31591796875, "learning_rate": 6.238383838383839e-06, "loss": 156.6524, "step": 7720 }, { "epoch": 0.06394507176241883, "grad_norm": 1524.9769287109375, "learning_rate": 6.246464646464647e-06, "loss": 252.2839, "step": 7730 }, { "epoch": 0.06402779501178807, "grad_norm": 1911.2991943359375, "learning_rate": 6.254545454545455e-06, "loss": 185.8043, "step": 7740 }, { "epoch": 0.0641105182611573, "grad_norm": 1456.3634033203125, "learning_rate": 6.262626262626264e-06, "loss": 189.6922, "step": 7750 }, { "epoch": 0.06419324151052654, "grad_norm": 872.6129150390625, "learning_rate": 6.270707070707071e-06, "loss": 211.5073, "step": 7760 }, { "epoch": 0.06427596475989576, "grad_norm": 2161.432861328125, "learning_rate": 6.27878787878788e-06, "loss": 227.991, "step": 7770 }, { "epoch": 0.064358688009265, "grad_norm": 1427.72900390625, "learning_rate": 6.286868686868688e-06, "loss": 187.7627, "step": 7780 }, { "epoch": 0.06444141125863424, "grad_norm": 1048.0509033203125, "learning_rate": 6.294949494949495e-06, "loss": 216.9241, "step": 7790 }, { "epoch": 0.06452413450800347, "grad_norm": 831.1154174804688, "learning_rate": 6.303030303030303e-06, "loss": 156.4739, "step": 7800 }, { "epoch": 0.06460685775737271, "grad_norm": 1421.5413818359375, "learning_rate": 6.311111111111111e-06, "loss": 240.7841, "step": 7810 }, { "epoch": 0.06468958100674195, "grad_norm": 1609.0704345703125, "learning_rate": 6.3191919191919195e-06, "loss": 202.0292, "step": 7820 }, { "epoch": 0.06477230425611118, "grad_norm": 1056.16064453125, "learning_rate": 6.327272727272727e-06, "loss": 150.6357, "step": 7830 }, { "epoch": 0.06485502750548042, "grad_norm": 3895.295166015625, "learning_rate": 6.3353535353535355e-06, "loss": 166.0538, "step": 7840 }, { "epoch": 0.06493775075484966, "grad_norm": 1299.444580078125, "learning_rate": 6.343434343434344e-06, "loss": 160.5584, "step": 7850 }, { "epoch": 0.06502047400421888, "grad_norm": 712.9761352539062, "learning_rate": 6.3515151515151516e-06, "loss": 197.3979, "step": 7860 }, { "epoch": 0.06510319725358812, "grad_norm": 1037.716064453125, "learning_rate": 6.35959595959596e-06, "loss": 200.6918, "step": 7870 }, { "epoch": 0.06518592050295735, "grad_norm": 1494.057373046875, "learning_rate": 6.3676767676767685e-06, "loss": 238.1615, "step": 7880 }, { "epoch": 0.06526864375232659, "grad_norm": 1253.9837646484375, "learning_rate": 6.375757575757576e-06, "loss": 190.7486, "step": 7890 }, { "epoch": 0.06535136700169583, "grad_norm": 1362.12353515625, "learning_rate": 6.3838383838383845e-06, "loss": 214.7477, "step": 7900 }, { "epoch": 0.06543409025106506, "grad_norm": 890.4193725585938, "learning_rate": 6.391919191919192e-06, "loss": 170.1652, "step": 7910 }, { "epoch": 0.0655168135004343, "grad_norm": 1644.9490966796875, "learning_rate": 6.4000000000000006e-06, "loss": 175.0597, "step": 7920 }, { "epoch": 0.06559953674980354, "grad_norm": 777.9044799804688, "learning_rate": 6.408080808080809e-06, "loss": 238.5318, "step": 7930 }, { "epoch": 0.06568225999917277, "grad_norm": 1498.41455078125, "learning_rate": 6.416161616161617e-06, "loss": 183.5011, "step": 7940 }, { "epoch": 0.065764983248542, "grad_norm": 1392.91748046875, "learning_rate": 6.424242424242425e-06, "loss": 164.5937, "step": 7950 }, { "epoch": 0.06584770649791123, "grad_norm": 1324.7021484375, "learning_rate": 6.4323232323232335e-06, "loss": 230.2011, "step": 7960 }, { "epoch": 0.06593042974728047, "grad_norm": 1371.1768798828125, "learning_rate": 6.440404040404041e-06, "loss": 226.2801, "step": 7970 }, { "epoch": 0.06601315299664971, "grad_norm": 2229.455078125, "learning_rate": 6.4484848484848496e-06, "loss": 196.2276, "step": 7980 }, { "epoch": 0.06609587624601894, "grad_norm": 1298.6649169921875, "learning_rate": 6.456565656565658e-06, "loss": 211.2707, "step": 7990 }, { "epoch": 0.06617859949538818, "grad_norm": 784.8013305664062, "learning_rate": 6.464646464646466e-06, "loss": 207.776, "step": 8000 }, { "epoch": 0.06626132274475742, "grad_norm": 911.982666015625, "learning_rate": 6.472727272727272e-06, "loss": 162.3888, "step": 8010 }, { "epoch": 0.06634404599412665, "grad_norm": 1066.7635498046875, "learning_rate": 6.480808080808081e-06, "loss": 182.5109, "step": 8020 }, { "epoch": 0.06642676924349589, "grad_norm": 733.5493774414062, "learning_rate": 6.488888888888889e-06, "loss": 140.7046, "step": 8030 }, { "epoch": 0.06650949249286511, "grad_norm": 1074.85888671875, "learning_rate": 6.496969696969697e-06, "loss": 182.7351, "step": 8040 }, { "epoch": 0.06659221574223435, "grad_norm": 1245.099365234375, "learning_rate": 6.505050505050505e-06, "loss": 228.4951, "step": 8050 }, { "epoch": 0.0666749389916036, "grad_norm": 820.326171875, "learning_rate": 6.513131313131314e-06, "loss": 172.91, "step": 8060 }, { "epoch": 0.06675766224097282, "grad_norm": 1347.561279296875, "learning_rate": 6.521212121212121e-06, "loss": 175.6294, "step": 8070 }, { "epoch": 0.06684038549034206, "grad_norm": 2743.030517578125, "learning_rate": 6.52929292929293e-06, "loss": 240.5082, "step": 8080 }, { "epoch": 0.0669231087397113, "grad_norm": 1014.7551879882812, "learning_rate": 6.537373737373737e-06, "loss": 181.1287, "step": 8090 }, { "epoch": 0.06700583198908053, "grad_norm": 1014.5245971679688, "learning_rate": 6.545454545454546e-06, "loss": 189.7482, "step": 8100 }, { "epoch": 0.06708855523844977, "grad_norm": 778.7886962890625, "learning_rate": 6.553535353535354e-06, "loss": 170.7764, "step": 8110 }, { "epoch": 0.06717127848781901, "grad_norm": 2081.901611328125, "learning_rate": 6.561616161616162e-06, "loss": 245.8872, "step": 8120 }, { "epoch": 0.06725400173718823, "grad_norm": 1881.017333984375, "learning_rate": 6.56969696969697e-06, "loss": 252.341, "step": 8130 }, { "epoch": 0.06733672498655748, "grad_norm": 910.3214111328125, "learning_rate": 6.577777777777779e-06, "loss": 179.5906, "step": 8140 }, { "epoch": 0.0674194482359267, "grad_norm": 1163.5965576171875, "learning_rate": 6.585858585858586e-06, "loss": 233.8698, "step": 8150 }, { "epoch": 0.06750217148529594, "grad_norm": 884.6065673828125, "learning_rate": 6.593939393939395e-06, "loss": 139.5692, "step": 8160 }, { "epoch": 0.06758489473466518, "grad_norm": 1140.02392578125, "learning_rate": 6.602020202020203e-06, "loss": 195.1031, "step": 8170 }, { "epoch": 0.06766761798403441, "grad_norm": 1378.050537109375, "learning_rate": 6.610101010101011e-06, "loss": 215.5058, "step": 8180 }, { "epoch": 0.06775034123340365, "grad_norm": 1974.1138916015625, "learning_rate": 6.618181818181819e-06, "loss": 294.2934, "step": 8190 }, { "epoch": 0.06783306448277289, "grad_norm": 832.8023681640625, "learning_rate": 6.626262626262627e-06, "loss": 184.4631, "step": 8200 }, { "epoch": 0.06791578773214212, "grad_norm": 856.9111938476562, "learning_rate": 6.634343434343435e-06, "loss": 170.4937, "step": 8210 }, { "epoch": 0.06799851098151136, "grad_norm": 916.113525390625, "learning_rate": 6.642424242424242e-06, "loss": 170.3063, "step": 8220 }, { "epoch": 0.06808123423088058, "grad_norm": 1267.204345703125, "learning_rate": 6.650505050505051e-06, "loss": 154.5532, "step": 8230 }, { "epoch": 0.06816395748024982, "grad_norm": 1163.3558349609375, "learning_rate": 6.658585858585859e-06, "loss": 196.9229, "step": 8240 }, { "epoch": 0.06824668072961906, "grad_norm": 1783.79541015625, "learning_rate": 6.666666666666667e-06, "loss": 210.5392, "step": 8250 }, { "epoch": 0.06832940397898829, "grad_norm": 917.7781982421875, "learning_rate": 6.674747474747475e-06, "loss": 177.4379, "step": 8260 }, { "epoch": 0.06841212722835753, "grad_norm": 674.7391967773438, "learning_rate": 6.682828282828283e-06, "loss": 186.2342, "step": 8270 }, { "epoch": 0.06849485047772677, "grad_norm": 654.15234375, "learning_rate": 6.690909090909091e-06, "loss": 136.4612, "step": 8280 }, { "epoch": 0.068577573727096, "grad_norm": 1094.0560302734375, "learning_rate": 6.6989898989899e-06, "loss": 170.5708, "step": 8290 }, { "epoch": 0.06866029697646524, "grad_norm": 1474.585693359375, "learning_rate": 6.707070707070707e-06, "loss": 203.4748, "step": 8300 }, { "epoch": 0.06874302022583446, "grad_norm": 1227.391845703125, "learning_rate": 6.715151515151516e-06, "loss": 222.0174, "step": 8310 }, { "epoch": 0.0688257434752037, "grad_norm": 1838.2691650390625, "learning_rate": 6.723232323232324e-06, "loss": 224.6444, "step": 8320 }, { "epoch": 0.06890846672457294, "grad_norm": 1169.060791015625, "learning_rate": 6.731313131313132e-06, "loss": 142.1164, "step": 8330 }, { "epoch": 0.06899118997394217, "grad_norm": 547.6967163085938, "learning_rate": 6.73939393939394e-06, "loss": 153.1282, "step": 8340 }, { "epoch": 0.06907391322331141, "grad_norm": 1083.063720703125, "learning_rate": 6.747474747474749e-06, "loss": 184.0296, "step": 8350 }, { "epoch": 0.06915663647268065, "grad_norm": 5596.943359375, "learning_rate": 6.755555555555556e-06, "loss": 173.6666, "step": 8360 }, { "epoch": 0.06923935972204988, "grad_norm": 1326.8721923828125, "learning_rate": 6.763636363636365e-06, "loss": 190.1673, "step": 8370 }, { "epoch": 0.06932208297141912, "grad_norm": 1101.3780517578125, "learning_rate": 6.771717171717172e-06, "loss": 220.6135, "step": 8380 }, { "epoch": 0.06940480622078836, "grad_norm": 1435.279296875, "learning_rate": 6.779797979797981e-06, "loss": 178.5454, "step": 8390 }, { "epoch": 0.06948752947015759, "grad_norm": 1620.093994140625, "learning_rate": 6.787878787878789e-06, "loss": 151.146, "step": 8400 }, { "epoch": 0.06957025271952683, "grad_norm": 1524.4627685546875, "learning_rate": 6.795959595959597e-06, "loss": 185.1478, "step": 8410 }, { "epoch": 0.06965297596889605, "grad_norm": 1806.7978515625, "learning_rate": 6.804040404040405e-06, "loss": 173.0628, "step": 8420 }, { "epoch": 0.06973569921826529, "grad_norm": 1826.8575439453125, "learning_rate": 6.812121212121212e-06, "loss": 167.8523, "step": 8430 }, { "epoch": 0.06981842246763453, "grad_norm": 1353.9345703125, "learning_rate": 6.82020202020202e-06, "loss": 194.5669, "step": 8440 }, { "epoch": 0.06990114571700376, "grad_norm": 2063.500244140625, "learning_rate": 6.828282828282828e-06, "loss": 184.4123, "step": 8450 }, { "epoch": 0.069983868966373, "grad_norm": 1244.160400390625, "learning_rate": 6.8363636363636364e-06, "loss": 234.5416, "step": 8460 }, { "epoch": 0.07006659221574224, "grad_norm": 894.0686645507812, "learning_rate": 6.844444444444445e-06, "loss": 157.8413, "step": 8470 }, { "epoch": 0.07014931546511147, "grad_norm": 953.1651000976562, "learning_rate": 6.8525252525252525e-06, "loss": 187.5382, "step": 8480 }, { "epoch": 0.07023203871448071, "grad_norm": 868.2008056640625, "learning_rate": 6.860606060606061e-06, "loss": 153.3808, "step": 8490 }, { "epoch": 0.07031476196384993, "grad_norm": 1246.61328125, "learning_rate": 6.868686868686869e-06, "loss": 198.8839, "step": 8500 }, { "epoch": 0.07039748521321917, "grad_norm": 1278.021484375, "learning_rate": 6.876767676767677e-06, "loss": 162.5859, "step": 8510 }, { "epoch": 0.07048020846258841, "grad_norm": 1224.3677978515625, "learning_rate": 6.8848484848484854e-06, "loss": 216.0751, "step": 8520 }, { "epoch": 0.07056293171195764, "grad_norm": 629.5072021484375, "learning_rate": 6.892929292929294e-06, "loss": 219.3646, "step": 8530 }, { "epoch": 0.07064565496132688, "grad_norm": 873.804443359375, "learning_rate": 6.9010101010101015e-06, "loss": 176.4796, "step": 8540 }, { "epoch": 0.07072837821069612, "grad_norm": 1832.682373046875, "learning_rate": 6.90909090909091e-06, "loss": 256.3848, "step": 8550 }, { "epoch": 0.07081110146006535, "grad_norm": 1244.7239990234375, "learning_rate": 6.9171717171717175e-06, "loss": 215.3179, "step": 8560 }, { "epoch": 0.07089382470943459, "grad_norm": 938.3501586914062, "learning_rate": 6.925252525252526e-06, "loss": 169.5242, "step": 8570 }, { "epoch": 0.07097654795880382, "grad_norm": 2076.439697265625, "learning_rate": 6.9333333333333344e-06, "loss": 200.3409, "step": 8580 }, { "epoch": 0.07105927120817306, "grad_norm": 800.4453735351562, "learning_rate": 6.941414141414142e-06, "loss": 192.4127, "step": 8590 }, { "epoch": 0.0711419944575423, "grad_norm": 1373.4234619140625, "learning_rate": 6.9494949494949505e-06, "loss": 161.1228, "step": 8600 }, { "epoch": 0.07122471770691152, "grad_norm": 950.77197265625, "learning_rate": 6.957575757575759e-06, "loss": 179.3362, "step": 8610 }, { "epoch": 0.07130744095628076, "grad_norm": 1121.46044921875, "learning_rate": 6.9656565656565665e-06, "loss": 196.0917, "step": 8620 }, { "epoch": 0.07139016420565, "grad_norm": 906.2406005859375, "learning_rate": 6.973737373737375e-06, "loss": 139.1432, "step": 8630 }, { "epoch": 0.07147288745501923, "grad_norm": 2017.980712890625, "learning_rate": 6.981818181818183e-06, "loss": 212.8915, "step": 8640 }, { "epoch": 0.07155561070438847, "grad_norm": 1261.973388671875, "learning_rate": 6.98989898989899e-06, "loss": 200.444, "step": 8650 }, { "epoch": 0.07163833395375771, "grad_norm": 855.2569580078125, "learning_rate": 6.997979797979798e-06, "loss": 222.0111, "step": 8660 }, { "epoch": 0.07172105720312694, "grad_norm": 1921.39599609375, "learning_rate": 7.006060606060606e-06, "loss": 222.5992, "step": 8670 }, { "epoch": 0.07180378045249618, "grad_norm": 857.9965209960938, "learning_rate": 7.014141414141415e-06, "loss": 219.586, "step": 8680 }, { "epoch": 0.0718865037018654, "grad_norm": 915.5559692382812, "learning_rate": 7.022222222222222e-06, "loss": 184.9689, "step": 8690 }, { "epoch": 0.07196922695123464, "grad_norm": 1838.1370849609375, "learning_rate": 7.030303030303031e-06, "loss": 232.5889, "step": 8700 }, { "epoch": 0.07205195020060388, "grad_norm": 932.5780639648438, "learning_rate": 7.038383838383839e-06, "loss": 171.6244, "step": 8710 }, { "epoch": 0.07213467344997311, "grad_norm": 1225.2701416015625, "learning_rate": 7.046464646464647e-06, "loss": 153.8994, "step": 8720 }, { "epoch": 0.07221739669934235, "grad_norm": 1276.4774169921875, "learning_rate": 7.054545454545455e-06, "loss": 195.4201, "step": 8730 }, { "epoch": 0.07230011994871159, "grad_norm": 1169.0623779296875, "learning_rate": 7.062626262626263e-06, "loss": 184.115, "step": 8740 }, { "epoch": 0.07238284319808082, "grad_norm": 1720.584716796875, "learning_rate": 7.070707070707071e-06, "loss": 225.3465, "step": 8750 }, { "epoch": 0.07246556644745006, "grad_norm": 811.720947265625, "learning_rate": 7.07878787878788e-06, "loss": 155.7371, "step": 8760 }, { "epoch": 0.07254828969681928, "grad_norm": 1744.4354248046875, "learning_rate": 7.086868686868687e-06, "loss": 193.3227, "step": 8770 }, { "epoch": 0.07263101294618853, "grad_norm": 862.0523681640625, "learning_rate": 7.094949494949496e-06, "loss": 170.9686, "step": 8780 }, { "epoch": 0.07271373619555777, "grad_norm": 2165.270751953125, "learning_rate": 7.103030303030304e-06, "loss": 234.9279, "step": 8790 }, { "epoch": 0.07279645944492699, "grad_norm": 1295.5478515625, "learning_rate": 7.111111111111112e-06, "loss": 211.1977, "step": 8800 }, { "epoch": 0.07287918269429623, "grad_norm": 957.7193603515625, "learning_rate": 7.11919191919192e-06, "loss": 191.1015, "step": 8810 }, { "epoch": 0.07296190594366547, "grad_norm": 1650.5340576171875, "learning_rate": 7.127272727272728e-06, "loss": 164.5978, "step": 8820 }, { "epoch": 0.0730446291930347, "grad_norm": 1488.514404296875, "learning_rate": 7.135353535353536e-06, "loss": 187.6664, "step": 8830 }, { "epoch": 0.07312735244240394, "grad_norm": 1756.99853515625, "learning_rate": 7.143434343434345e-06, "loss": 219.4122, "step": 8840 }, { "epoch": 0.07321007569177317, "grad_norm": 924.37158203125, "learning_rate": 7.151515151515152e-06, "loss": 157.6518, "step": 8850 }, { "epoch": 0.0732927989411424, "grad_norm": 1157.555908203125, "learning_rate": 7.15959595959596e-06, "loss": 162.5851, "step": 8860 }, { "epoch": 0.07337552219051165, "grad_norm": 668.1375732421875, "learning_rate": 7.1676767676767676e-06, "loss": 153.8082, "step": 8870 }, { "epoch": 0.07345824543988087, "grad_norm": 1706.42333984375, "learning_rate": 7.175757575757576e-06, "loss": 145.3626, "step": 8880 }, { "epoch": 0.07354096868925011, "grad_norm": 2563.07861328125, "learning_rate": 7.1838383838383845e-06, "loss": 217.706, "step": 8890 }, { "epoch": 0.07362369193861935, "grad_norm": 1756.02392578125, "learning_rate": 7.191919191919192e-06, "loss": 177.5173, "step": 8900 }, { "epoch": 0.07370641518798858, "grad_norm": 1470.858642578125, "learning_rate": 7.2000000000000005e-06, "loss": 218.2071, "step": 8910 }, { "epoch": 0.07378913843735782, "grad_norm": 1429.1534423828125, "learning_rate": 7.208080808080808e-06, "loss": 192.6579, "step": 8920 }, { "epoch": 0.07387186168672706, "grad_norm": 1044.5311279296875, "learning_rate": 7.2161616161616166e-06, "loss": 175.2781, "step": 8930 }, { "epoch": 0.07395458493609629, "grad_norm": 1081.16748046875, "learning_rate": 7.224242424242425e-06, "loss": 195.337, "step": 8940 }, { "epoch": 0.07403730818546553, "grad_norm": 1816.50634765625, "learning_rate": 7.232323232323233e-06, "loss": 164.159, "step": 8950 }, { "epoch": 0.07412003143483475, "grad_norm": 941.095947265625, "learning_rate": 7.240404040404041e-06, "loss": 167.4316, "step": 8960 }, { "epoch": 0.074202754684204, "grad_norm": 1430.9918212890625, "learning_rate": 7.2484848484848495e-06, "loss": 142.7207, "step": 8970 }, { "epoch": 0.07428547793357324, "grad_norm": 987.8670043945312, "learning_rate": 7.256565656565657e-06, "loss": 185.2262, "step": 8980 }, { "epoch": 0.07436820118294246, "grad_norm": 1886.202392578125, "learning_rate": 7.2646464646464656e-06, "loss": 201.7182, "step": 8990 }, { "epoch": 0.0744509244323117, "grad_norm": 6469.64501953125, "learning_rate": 7.272727272727273e-06, "loss": 242.3246, "step": 9000 }, { "epoch": 0.07453364768168094, "grad_norm": 1446.23388671875, "learning_rate": 7.280808080808082e-06, "loss": 185.7719, "step": 9010 }, { "epoch": 0.07461637093105017, "grad_norm": 1158.1019287109375, "learning_rate": 7.28888888888889e-06, "loss": 135.7039, "step": 9020 }, { "epoch": 0.07469909418041941, "grad_norm": 1710.8179931640625, "learning_rate": 7.296969696969698e-06, "loss": 214.3662, "step": 9030 }, { "epoch": 0.07478181742978864, "grad_norm": 693.0316772460938, "learning_rate": 7.305050505050506e-06, "loss": 168.4758, "step": 9040 }, { "epoch": 0.07486454067915788, "grad_norm": 1307.99267578125, "learning_rate": 7.3131313131313146e-06, "loss": 221.4772, "step": 9050 }, { "epoch": 0.07494726392852712, "grad_norm": 578.5460815429688, "learning_rate": 7.321212121212122e-06, "loss": 172.2857, "step": 9060 }, { "epoch": 0.07502998717789634, "grad_norm": 4154.9052734375, "learning_rate": 7.32929292929293e-06, "loss": 200.6511, "step": 9070 }, { "epoch": 0.07511271042726558, "grad_norm": 3818.542236328125, "learning_rate": 7.337373737373737e-06, "loss": 198.6826, "step": 9080 }, { "epoch": 0.07519543367663482, "grad_norm": 630.0006713867188, "learning_rate": 7.345454545454546e-06, "loss": 146.3737, "step": 9090 }, { "epoch": 0.07527815692600405, "grad_norm": 1059.06640625, "learning_rate": 7.353535353535353e-06, "loss": 163.5028, "step": 9100 }, { "epoch": 0.07536088017537329, "grad_norm": 668.64111328125, "learning_rate": 7.361616161616162e-06, "loss": 170.1274, "step": 9110 }, { "epoch": 0.07544360342474253, "grad_norm": 1227.2437744140625, "learning_rate": 7.36969696969697e-06, "loss": 167.1523, "step": 9120 }, { "epoch": 0.07552632667411176, "grad_norm": 989.3958740234375, "learning_rate": 7.377777777777778e-06, "loss": 179.0444, "step": 9130 }, { "epoch": 0.075609049923481, "grad_norm": 2812.70166015625, "learning_rate": 7.385858585858586e-06, "loss": 170.3505, "step": 9140 }, { "epoch": 0.07569177317285022, "grad_norm": 1429.5191650390625, "learning_rate": 7.393939393939395e-06, "loss": 198.7005, "step": 9150 }, { "epoch": 0.07577449642221946, "grad_norm": 937.4029541015625, "learning_rate": 7.402020202020202e-06, "loss": 149.7339, "step": 9160 }, { "epoch": 0.0758572196715887, "grad_norm": 6428.83642578125, "learning_rate": 7.410101010101011e-06, "loss": 224.5054, "step": 9170 }, { "epoch": 0.07593994292095793, "grad_norm": 1135.309814453125, "learning_rate": 7.4181818181818185e-06, "loss": 206.6003, "step": 9180 }, { "epoch": 0.07602266617032717, "grad_norm": 663.953125, "learning_rate": 7.426262626262627e-06, "loss": 148.6399, "step": 9190 }, { "epoch": 0.07610538941969641, "grad_norm": 1465.2401123046875, "learning_rate": 7.434343434343435e-06, "loss": 129.7046, "step": 9200 }, { "epoch": 0.07618811266906564, "grad_norm": 840.3555297851562, "learning_rate": 7.442424242424243e-06, "loss": 212.8563, "step": 9210 }, { "epoch": 0.07627083591843488, "grad_norm": 1056.4595947265625, "learning_rate": 7.450505050505051e-06, "loss": 219.987, "step": 9220 }, { "epoch": 0.0763535591678041, "grad_norm": 1585.2525634765625, "learning_rate": 7.45858585858586e-06, "loss": 254.3914, "step": 9230 }, { "epoch": 0.07643628241717335, "grad_norm": 1167.71728515625, "learning_rate": 7.4666666666666675e-06, "loss": 173.4369, "step": 9240 }, { "epoch": 0.07651900566654259, "grad_norm": 1241.547119140625, "learning_rate": 7.474747474747476e-06, "loss": 196.5241, "step": 9250 }, { "epoch": 0.07660172891591181, "grad_norm": 1076.36279296875, "learning_rate": 7.4828282828282835e-06, "loss": 141.0298, "step": 9260 }, { "epoch": 0.07668445216528105, "grad_norm": 894.4515991210938, "learning_rate": 7.490909090909092e-06, "loss": 182.4499, "step": 9270 }, { "epoch": 0.0767671754146503, "grad_norm": 1318.6171875, "learning_rate": 7.4989898989899e-06, "loss": 294.3835, "step": 9280 }, { "epoch": 0.07684989866401952, "grad_norm": 1835.936279296875, "learning_rate": 7.507070707070707e-06, "loss": 200.7481, "step": 9290 }, { "epoch": 0.07693262191338876, "grad_norm": 1094.5806884765625, "learning_rate": 7.515151515151516e-06, "loss": 155.6739, "step": 9300 }, { "epoch": 0.07701534516275799, "grad_norm": 2438.4599609375, "learning_rate": 7.523232323232323e-06, "loss": 168.196, "step": 9310 }, { "epoch": 0.07709806841212723, "grad_norm": 1409.369384765625, "learning_rate": 7.531313131313132e-06, "loss": 168.5339, "step": 9320 }, { "epoch": 0.07718079166149647, "grad_norm": 792.833740234375, "learning_rate": 7.53939393939394e-06, "loss": 204.8236, "step": 9330 }, { "epoch": 0.0772635149108657, "grad_norm": 4106.85107421875, "learning_rate": 7.547474747474748e-06, "loss": 206.0253, "step": 9340 }, { "epoch": 0.07734623816023493, "grad_norm": 1158.9046630859375, "learning_rate": 7.555555555555556e-06, "loss": 200.0595, "step": 9350 }, { "epoch": 0.07742896140960417, "grad_norm": 829.5303344726562, "learning_rate": 7.563636363636364e-06, "loss": 152.1526, "step": 9360 }, { "epoch": 0.0775116846589734, "grad_norm": 1672.3824462890625, "learning_rate": 7.571717171717172e-06, "loss": 196.9728, "step": 9370 }, { "epoch": 0.07759440790834264, "grad_norm": 1697.9437255859375, "learning_rate": 7.579797979797981e-06, "loss": 140.7818, "step": 9380 }, { "epoch": 0.07767713115771188, "grad_norm": 1467.42138671875, "learning_rate": 7.587878787878788e-06, "loss": 237.8365, "step": 9390 }, { "epoch": 0.07775985440708111, "grad_norm": 1372.5516357421875, "learning_rate": 7.595959595959597e-06, "loss": 201.537, "step": 9400 }, { "epoch": 0.07784257765645035, "grad_norm": 1280.147216796875, "learning_rate": 7.604040404040405e-06, "loss": 168.6732, "step": 9410 }, { "epoch": 0.07792530090581958, "grad_norm": 1045.321533203125, "learning_rate": 7.612121212121213e-06, "loss": 154.092, "step": 9420 }, { "epoch": 0.07800802415518882, "grad_norm": 6423.11962890625, "learning_rate": 7.620202020202021e-06, "loss": 251.8613, "step": 9430 }, { "epoch": 0.07809074740455806, "grad_norm": 1650.4332275390625, "learning_rate": 7.628282828282829e-06, "loss": 218.2032, "step": 9440 }, { "epoch": 0.07817347065392728, "grad_norm": 1520.5958251953125, "learning_rate": 7.636363636363638e-06, "loss": 194.5603, "step": 9450 }, { "epoch": 0.07825619390329652, "grad_norm": 1989.30126953125, "learning_rate": 7.644444444444445e-06, "loss": 167.2613, "step": 9460 }, { "epoch": 0.07833891715266576, "grad_norm": 1363.1075439453125, "learning_rate": 7.652525252525253e-06, "loss": 193.0324, "step": 9470 }, { "epoch": 0.07842164040203499, "grad_norm": 1044.422607421875, "learning_rate": 7.660606060606062e-06, "loss": 239.5535, "step": 9480 }, { "epoch": 0.07850436365140423, "grad_norm": 1506.1378173828125, "learning_rate": 7.66868686868687e-06, "loss": 157.3207, "step": 9490 }, { "epoch": 0.07858708690077346, "grad_norm": 1023.1807250976562, "learning_rate": 7.676767676767677e-06, "loss": 177.0086, "step": 9500 }, { "epoch": 0.0786698101501427, "grad_norm": 766.3860473632812, "learning_rate": 7.684848484848485e-06, "loss": 180.8027, "step": 9510 }, { "epoch": 0.07875253339951194, "grad_norm": 1220.765380859375, "learning_rate": 7.692929292929294e-06, "loss": 199.764, "step": 9520 }, { "epoch": 0.07883525664888116, "grad_norm": 798.783447265625, "learning_rate": 7.7010101010101e-06, "loss": 178.7332, "step": 9530 }, { "epoch": 0.0789179798982504, "grad_norm": 1135.2913818359375, "learning_rate": 7.709090909090909e-06, "loss": 138.7885, "step": 9540 }, { "epoch": 0.07900070314761964, "grad_norm": 820.0831298828125, "learning_rate": 7.717171717171717e-06, "loss": 192.7479, "step": 9550 }, { "epoch": 0.07908342639698887, "grad_norm": 1487.1383056640625, "learning_rate": 7.725252525252526e-06, "loss": 202.4578, "step": 9560 }, { "epoch": 0.07916614964635811, "grad_norm": 3294.249267578125, "learning_rate": 7.733333333333334e-06, "loss": 154.1931, "step": 9570 }, { "epoch": 0.07924887289572734, "grad_norm": 1593.376708984375, "learning_rate": 7.741414141414141e-06, "loss": 220.1035, "step": 9580 }, { "epoch": 0.07933159614509658, "grad_norm": 1254.923095703125, "learning_rate": 7.74949494949495e-06, "loss": 162.3905, "step": 9590 }, { "epoch": 0.07941431939446582, "grad_norm": 989.010009765625, "learning_rate": 7.757575757575758e-06, "loss": 150.6632, "step": 9600 }, { "epoch": 0.07949704264383504, "grad_norm": 668.603759765625, "learning_rate": 7.765656565656566e-06, "loss": 180.0927, "step": 9610 }, { "epoch": 0.07957976589320429, "grad_norm": 918.044677734375, "learning_rate": 7.773737373737375e-06, "loss": 216.1173, "step": 9620 }, { "epoch": 0.07966248914257353, "grad_norm": 1150.803955078125, "learning_rate": 7.781818181818183e-06, "loss": 165.5561, "step": 9630 }, { "epoch": 0.07974521239194275, "grad_norm": 1339.1085205078125, "learning_rate": 7.78989898989899e-06, "loss": 120.4266, "step": 9640 }, { "epoch": 0.07982793564131199, "grad_norm": 1119.98388671875, "learning_rate": 7.797979797979799e-06, "loss": 186.139, "step": 9650 }, { "epoch": 0.07991065889068123, "grad_norm": 845.5322875976562, "learning_rate": 7.806060606060607e-06, "loss": 147.2671, "step": 9660 }, { "epoch": 0.07999338214005046, "grad_norm": 1525.1080322265625, "learning_rate": 7.814141414141415e-06, "loss": 227.4823, "step": 9670 }, { "epoch": 0.0800761053894197, "grad_norm": 936.9368286132812, "learning_rate": 7.822222222222224e-06, "loss": 147.8492, "step": 9680 }, { "epoch": 0.08015882863878893, "grad_norm": 1528.34375, "learning_rate": 7.83030303030303e-06, "loss": 176.4171, "step": 9690 }, { "epoch": 0.08024155188815817, "grad_norm": 1028.9813232421875, "learning_rate": 7.838383838383839e-06, "loss": 168.1568, "step": 9700 }, { "epoch": 0.0803242751375274, "grad_norm": 3313.676513671875, "learning_rate": 7.846464646464646e-06, "loss": 211.1931, "step": 9710 }, { "epoch": 0.08040699838689663, "grad_norm": 698.055419921875, "learning_rate": 7.854545454545454e-06, "loss": 147.0731, "step": 9720 }, { "epoch": 0.08048972163626587, "grad_norm": 1256.8193359375, "learning_rate": 7.862626262626263e-06, "loss": 141.3686, "step": 9730 }, { "epoch": 0.08057244488563511, "grad_norm": 1118.2659912109375, "learning_rate": 7.870707070707071e-06, "loss": 191.6792, "step": 9740 }, { "epoch": 0.08065516813500434, "grad_norm": 1615.0177001953125, "learning_rate": 7.87878787878788e-06, "loss": 171.2617, "step": 9750 }, { "epoch": 0.08073789138437358, "grad_norm": 1359.4404296875, "learning_rate": 7.886868686868686e-06, "loss": 233.8179, "step": 9760 }, { "epoch": 0.08082061463374281, "grad_norm": 1080.4814453125, "learning_rate": 7.894949494949495e-06, "loss": 167.5727, "step": 9770 }, { "epoch": 0.08090333788311205, "grad_norm": 1568.5997314453125, "learning_rate": 7.903030303030303e-06, "loss": 180.408, "step": 9780 }, { "epoch": 0.08098606113248129, "grad_norm": 1154.7166748046875, "learning_rate": 7.911111111111112e-06, "loss": 141.3988, "step": 9790 }, { "epoch": 0.08106878438185051, "grad_norm": 1465.5701904296875, "learning_rate": 7.91919191919192e-06, "loss": 235.8258, "step": 9800 }, { "epoch": 0.08115150763121975, "grad_norm": 1399.992919921875, "learning_rate": 7.927272727272729e-06, "loss": 197.8094, "step": 9810 }, { "epoch": 0.081234230880589, "grad_norm": 1609.233154296875, "learning_rate": 7.935353535353535e-06, "loss": 169.6143, "step": 9820 }, { "epoch": 0.08131695412995822, "grad_norm": 771.2155151367188, "learning_rate": 7.943434343434344e-06, "loss": 174.0228, "step": 9830 }, { "epoch": 0.08139967737932746, "grad_norm": 1397.8433837890625, "learning_rate": 7.951515151515152e-06, "loss": 195.8236, "step": 9840 }, { "epoch": 0.08148240062869669, "grad_norm": 1942.0560302734375, "learning_rate": 7.95959595959596e-06, "loss": 174.9514, "step": 9850 }, { "epoch": 0.08156512387806593, "grad_norm": 881.597900390625, "learning_rate": 7.96767676767677e-06, "loss": 155.3664, "step": 9860 }, { "epoch": 0.08164784712743517, "grad_norm": 982.1563110351562, "learning_rate": 7.975757575757576e-06, "loss": 166.2256, "step": 9870 }, { "epoch": 0.0817305703768044, "grad_norm": 1786.5938720703125, "learning_rate": 7.983838383838384e-06, "loss": 179.014, "step": 9880 }, { "epoch": 0.08181329362617364, "grad_norm": 1422.998046875, "learning_rate": 7.991919191919193e-06, "loss": 185.8182, "step": 9890 }, { "epoch": 0.08189601687554288, "grad_norm": 1009.948486328125, "learning_rate": 8.000000000000001e-06, "loss": 153.7416, "step": 9900 }, { "epoch": 0.0819787401249121, "grad_norm": 929.018798828125, "learning_rate": 8.00808080808081e-06, "loss": 178.9259, "step": 9910 }, { "epoch": 0.08206146337428134, "grad_norm": 654.3829345703125, "learning_rate": 8.016161616161617e-06, "loss": 130.9901, "step": 9920 }, { "epoch": 0.08214418662365058, "grad_norm": 1169.20068359375, "learning_rate": 8.024242424242425e-06, "loss": 125.0886, "step": 9930 }, { "epoch": 0.08222690987301981, "grad_norm": 1203.2630615234375, "learning_rate": 8.032323232323232e-06, "loss": 191.309, "step": 9940 }, { "epoch": 0.08230963312238905, "grad_norm": 1353.7027587890625, "learning_rate": 8.04040404040404e-06, "loss": 239.4487, "step": 9950 }, { "epoch": 0.08239235637175828, "grad_norm": 1199.4378662109375, "learning_rate": 8.048484848484849e-06, "loss": 173.5184, "step": 9960 }, { "epoch": 0.08247507962112752, "grad_norm": 1027.1614990234375, "learning_rate": 8.056565656565657e-06, "loss": 236.1041, "step": 9970 }, { "epoch": 0.08255780287049676, "grad_norm": 2071.078125, "learning_rate": 8.064646464646466e-06, "loss": 210.167, "step": 9980 }, { "epoch": 0.08264052611986598, "grad_norm": 931.7273559570312, "learning_rate": 8.072727272727274e-06, "loss": 169.0378, "step": 9990 }, { "epoch": 0.08272324936923522, "grad_norm": 1780.4796142578125, "learning_rate": 8.08080808080808e-06, "loss": 182.981, "step": 10000 }, { "epoch": 0.08280597261860446, "grad_norm": 1484.16845703125, "learning_rate": 8.08888888888889e-06, "loss": 190.9732, "step": 10010 }, { "epoch": 0.08288869586797369, "grad_norm": 1517.7740478515625, "learning_rate": 8.096969696969698e-06, "loss": 214.248, "step": 10020 }, { "epoch": 0.08297141911734293, "grad_norm": 4507.2001953125, "learning_rate": 8.105050505050506e-06, "loss": 149.1075, "step": 10030 }, { "epoch": 0.08305414236671216, "grad_norm": 1202.4232177734375, "learning_rate": 8.113131313131315e-06, "loss": 167.8956, "step": 10040 }, { "epoch": 0.0831368656160814, "grad_norm": 3145.95556640625, "learning_rate": 8.121212121212121e-06, "loss": 218.0878, "step": 10050 }, { "epoch": 0.08321958886545064, "grad_norm": 833.5780029296875, "learning_rate": 8.12929292929293e-06, "loss": 183.5963, "step": 10060 }, { "epoch": 0.08330231211481987, "grad_norm": 1200.9332275390625, "learning_rate": 8.137373737373738e-06, "loss": 200.6742, "step": 10070 }, { "epoch": 0.0833850353641891, "grad_norm": 2536.378662109375, "learning_rate": 8.145454545454547e-06, "loss": 244.585, "step": 10080 }, { "epoch": 0.08346775861355835, "grad_norm": 0.0, "learning_rate": 8.153535353535355e-06, "loss": 151.4828, "step": 10090 }, { "epoch": 0.08355048186292757, "grad_norm": 1108.849853515625, "learning_rate": 8.161616161616162e-06, "loss": 178.3399, "step": 10100 }, { "epoch": 0.08363320511229681, "grad_norm": 809.9747314453125, "learning_rate": 8.16969696969697e-06, "loss": 201.301, "step": 10110 }, { "epoch": 0.08371592836166604, "grad_norm": 1331.628173828125, "learning_rate": 8.177777777777779e-06, "loss": 141.3669, "step": 10120 }, { "epoch": 0.08379865161103528, "grad_norm": 990.9824829101562, "learning_rate": 8.185858585858587e-06, "loss": 169.3207, "step": 10130 }, { "epoch": 0.08388137486040452, "grad_norm": 1191.9073486328125, "learning_rate": 8.193939393939394e-06, "loss": 136.0156, "step": 10140 }, { "epoch": 0.08396409810977375, "grad_norm": 829.3964233398438, "learning_rate": 8.202020202020202e-06, "loss": 165.8068, "step": 10150 }, { "epoch": 0.08404682135914299, "grad_norm": 781.4263305664062, "learning_rate": 8.21010101010101e-06, "loss": 186.8697, "step": 10160 }, { "epoch": 0.08412954460851223, "grad_norm": 815.2606811523438, "learning_rate": 8.21818181818182e-06, "loss": 257.8631, "step": 10170 }, { "epoch": 0.08421226785788145, "grad_norm": 1860.0218505859375, "learning_rate": 8.226262626262626e-06, "loss": 187.696, "step": 10180 }, { "epoch": 0.0842949911072507, "grad_norm": 2099.6728515625, "learning_rate": 8.234343434343434e-06, "loss": 205.9569, "step": 10190 }, { "epoch": 0.08437771435661993, "grad_norm": 1526.5322265625, "learning_rate": 8.242424242424243e-06, "loss": 200.3001, "step": 10200 }, { "epoch": 0.08446043760598916, "grad_norm": 1122.8441162109375, "learning_rate": 8.250505050505051e-06, "loss": 132.1188, "step": 10210 }, { "epoch": 0.0845431608553584, "grad_norm": 952.8866577148438, "learning_rate": 8.25858585858586e-06, "loss": 180.2814, "step": 10220 }, { "epoch": 0.08462588410472763, "grad_norm": 1098.6842041015625, "learning_rate": 8.266666666666667e-06, "loss": 186.863, "step": 10230 }, { "epoch": 0.08470860735409687, "grad_norm": 1201.576904296875, "learning_rate": 8.274747474747475e-06, "loss": 166.658, "step": 10240 }, { "epoch": 0.08479133060346611, "grad_norm": 3628.583984375, "learning_rate": 8.282828282828283e-06, "loss": 280.9522, "step": 10250 }, { "epoch": 0.08487405385283533, "grad_norm": 956.4686279296875, "learning_rate": 8.290909090909092e-06, "loss": 177.9082, "step": 10260 }, { "epoch": 0.08495677710220458, "grad_norm": 1213.8653564453125, "learning_rate": 8.2989898989899e-06, "loss": 176.0302, "step": 10270 }, { "epoch": 0.08503950035157382, "grad_norm": 1263.095458984375, "learning_rate": 8.307070707070707e-06, "loss": 171.6865, "step": 10280 }, { "epoch": 0.08512222360094304, "grad_norm": 1094.4754638671875, "learning_rate": 8.315151515151516e-06, "loss": 158.2062, "step": 10290 }, { "epoch": 0.08520494685031228, "grad_norm": 957.9495239257812, "learning_rate": 8.323232323232324e-06, "loss": 169.5506, "step": 10300 }, { "epoch": 0.08528767009968151, "grad_norm": 1073.7796630859375, "learning_rate": 8.331313131313132e-06, "loss": 147.6098, "step": 10310 }, { "epoch": 0.08537039334905075, "grad_norm": 1453.0513916015625, "learning_rate": 8.339393939393941e-06, "loss": 189.7265, "step": 10320 }, { "epoch": 0.08545311659841999, "grad_norm": 791.720703125, "learning_rate": 8.34747474747475e-06, "loss": 214.0726, "step": 10330 }, { "epoch": 0.08553583984778922, "grad_norm": 1202.690673828125, "learning_rate": 8.355555555555556e-06, "loss": 197.4867, "step": 10340 }, { "epoch": 0.08561856309715846, "grad_norm": 1175.0545654296875, "learning_rate": 8.363636363636365e-06, "loss": 153.0685, "step": 10350 }, { "epoch": 0.0857012863465277, "grad_norm": 745.9451293945312, "learning_rate": 8.371717171717171e-06, "loss": 170.8535, "step": 10360 }, { "epoch": 0.08578400959589692, "grad_norm": 1357.7061767578125, "learning_rate": 8.37979797979798e-06, "loss": 162.8026, "step": 10370 }, { "epoch": 0.08586673284526616, "grad_norm": 1293.4013671875, "learning_rate": 8.387878787878788e-06, "loss": 152.5479, "step": 10380 }, { "epoch": 0.0859494560946354, "grad_norm": 1522.9151611328125, "learning_rate": 8.395959595959597e-06, "loss": 180.1763, "step": 10390 }, { "epoch": 0.08603217934400463, "grad_norm": 2097.494140625, "learning_rate": 8.404040404040405e-06, "loss": 168.4439, "step": 10400 }, { "epoch": 0.08611490259337387, "grad_norm": 1112.0291748046875, "learning_rate": 8.412121212121212e-06, "loss": 193.0542, "step": 10410 }, { "epoch": 0.0861976258427431, "grad_norm": 927.9266357421875, "learning_rate": 8.42020202020202e-06, "loss": 211.676, "step": 10420 }, { "epoch": 0.08628034909211234, "grad_norm": 1505.004150390625, "learning_rate": 8.428282828282829e-06, "loss": 203.2798, "step": 10430 }, { "epoch": 0.08636307234148158, "grad_norm": 1962.483154296875, "learning_rate": 8.436363636363637e-06, "loss": 217.3728, "step": 10440 }, { "epoch": 0.0864457955908508, "grad_norm": 2074.048583984375, "learning_rate": 8.444444444444446e-06, "loss": 224.9844, "step": 10450 }, { "epoch": 0.08652851884022005, "grad_norm": 1316.5950927734375, "learning_rate": 8.452525252525252e-06, "loss": 165.5474, "step": 10460 }, { "epoch": 0.08661124208958929, "grad_norm": 1096.7896728515625, "learning_rate": 8.460606060606061e-06, "loss": 138.2374, "step": 10470 }, { "epoch": 0.08669396533895851, "grad_norm": 1451.124755859375, "learning_rate": 8.46868686868687e-06, "loss": 141.8237, "step": 10480 }, { "epoch": 0.08677668858832775, "grad_norm": 1239.64697265625, "learning_rate": 8.476767676767678e-06, "loss": 156.3808, "step": 10490 }, { "epoch": 0.08685941183769698, "grad_norm": 689.178955078125, "learning_rate": 8.484848484848486e-06, "loss": 143.1666, "step": 10500 }, { "epoch": 0.08694213508706622, "grad_norm": 684.1348266601562, "learning_rate": 8.492929292929295e-06, "loss": 182.6362, "step": 10510 }, { "epoch": 0.08702485833643546, "grad_norm": 1299.8443603515625, "learning_rate": 8.501010101010101e-06, "loss": 160.0256, "step": 10520 }, { "epoch": 0.08710758158580469, "grad_norm": 845.9354858398438, "learning_rate": 8.50909090909091e-06, "loss": 184.4307, "step": 10530 }, { "epoch": 0.08719030483517393, "grad_norm": 1035.8701171875, "learning_rate": 8.517171717171718e-06, "loss": 228.6084, "step": 10540 }, { "epoch": 0.08727302808454317, "grad_norm": 668.9830932617188, "learning_rate": 8.525252525252527e-06, "loss": 200.758, "step": 10550 }, { "epoch": 0.0873557513339124, "grad_norm": 1058.86181640625, "learning_rate": 8.533333333333335e-06, "loss": 204.7663, "step": 10560 }, { "epoch": 0.08743847458328163, "grad_norm": 1521.737548828125, "learning_rate": 8.541414141414142e-06, "loss": 192.2928, "step": 10570 }, { "epoch": 0.08752119783265086, "grad_norm": 1117.1220703125, "learning_rate": 8.54949494949495e-06, "loss": 142.4138, "step": 10580 }, { "epoch": 0.0876039210820201, "grad_norm": 1011.63525390625, "learning_rate": 8.557575757575757e-06, "loss": 193.8034, "step": 10590 }, { "epoch": 0.08768664433138934, "grad_norm": 2114.847900390625, "learning_rate": 8.565656565656566e-06, "loss": 184.3933, "step": 10600 }, { "epoch": 0.08776936758075857, "grad_norm": 823.4564819335938, "learning_rate": 8.573737373737374e-06, "loss": 186.983, "step": 10610 }, { "epoch": 0.08785209083012781, "grad_norm": 1262.8677978515625, "learning_rate": 8.581818181818183e-06, "loss": 145.9637, "step": 10620 }, { "epoch": 0.08793481407949705, "grad_norm": 1084.4609375, "learning_rate": 8.589898989898991e-06, "loss": 199.8638, "step": 10630 }, { "epoch": 0.08801753732886627, "grad_norm": 1483.7017822265625, "learning_rate": 8.597979797979798e-06, "loss": 183.0729, "step": 10640 }, { "epoch": 0.08810026057823551, "grad_norm": 1132.1685791015625, "learning_rate": 8.606060606060606e-06, "loss": 160.7905, "step": 10650 }, { "epoch": 0.08818298382760476, "grad_norm": 1912.802978515625, "learning_rate": 8.614141414141415e-06, "loss": 235.4324, "step": 10660 }, { "epoch": 0.08826570707697398, "grad_norm": 1315.4351806640625, "learning_rate": 8.622222222222223e-06, "loss": 186.3054, "step": 10670 }, { "epoch": 0.08834843032634322, "grad_norm": 884.4146118164062, "learning_rate": 8.630303030303032e-06, "loss": 157.2276, "step": 10680 }, { "epoch": 0.08843115357571245, "grad_norm": 1087.9329833984375, "learning_rate": 8.63838383838384e-06, "loss": 154.3768, "step": 10690 }, { "epoch": 0.08851387682508169, "grad_norm": 913.88818359375, "learning_rate": 8.646464646464647e-06, "loss": 185.6447, "step": 10700 }, { "epoch": 0.08859660007445093, "grad_norm": 2494.311279296875, "learning_rate": 8.654545454545455e-06, "loss": 195.7933, "step": 10710 }, { "epoch": 0.08867932332382016, "grad_norm": 1353.1029052734375, "learning_rate": 8.662626262626264e-06, "loss": 179.2149, "step": 10720 }, { "epoch": 0.0887620465731894, "grad_norm": 1235.08056640625, "learning_rate": 8.670707070707072e-06, "loss": 140.4072, "step": 10730 }, { "epoch": 0.08884476982255864, "grad_norm": 1513.452880859375, "learning_rate": 8.67878787878788e-06, "loss": 188.1772, "step": 10740 }, { "epoch": 0.08892749307192786, "grad_norm": 576.0892333984375, "learning_rate": 8.686868686868687e-06, "loss": 149.6942, "step": 10750 }, { "epoch": 0.0890102163212971, "grad_norm": 1103.5166015625, "learning_rate": 8.694949494949496e-06, "loss": 165.9619, "step": 10760 }, { "epoch": 0.08909293957066633, "grad_norm": 964.4625244140625, "learning_rate": 8.703030303030304e-06, "loss": 138.5367, "step": 10770 }, { "epoch": 0.08917566282003557, "grad_norm": 1614.7874755859375, "learning_rate": 8.711111111111111e-06, "loss": 175.2976, "step": 10780 }, { "epoch": 0.08925838606940481, "grad_norm": 1648.361328125, "learning_rate": 8.71919191919192e-06, "loss": 187.3021, "step": 10790 }, { "epoch": 0.08934110931877404, "grad_norm": 1707.2655029296875, "learning_rate": 8.727272727272728e-06, "loss": 258.8253, "step": 10800 }, { "epoch": 0.08942383256814328, "grad_norm": 1160.01806640625, "learning_rate": 8.735353535353536e-06, "loss": 194.5743, "step": 10810 }, { "epoch": 0.08950655581751252, "grad_norm": 1146.39990234375, "learning_rate": 8.743434343434343e-06, "loss": 203.5324, "step": 10820 }, { "epoch": 0.08958927906688174, "grad_norm": 3626.928955078125, "learning_rate": 8.751515151515151e-06, "loss": 253.2095, "step": 10830 }, { "epoch": 0.08967200231625098, "grad_norm": 728.5478515625, "learning_rate": 8.75959595959596e-06, "loss": 145.7956, "step": 10840 }, { "epoch": 0.08975472556562021, "grad_norm": 1002.9689331054688, "learning_rate": 8.767676767676768e-06, "loss": 181.9694, "step": 10850 }, { "epoch": 0.08983744881498945, "grad_norm": 1103.3865966796875, "learning_rate": 8.775757575757577e-06, "loss": 177.4934, "step": 10860 }, { "epoch": 0.08992017206435869, "grad_norm": 1987.7679443359375, "learning_rate": 8.783838383838385e-06, "loss": 190.5272, "step": 10870 }, { "epoch": 0.09000289531372792, "grad_norm": 1918.5572509765625, "learning_rate": 8.791919191919192e-06, "loss": 176.4213, "step": 10880 }, { "epoch": 0.09008561856309716, "grad_norm": 979.5933837890625, "learning_rate": 8.8e-06, "loss": 208.7932, "step": 10890 }, { "epoch": 0.0901683418124664, "grad_norm": 1254.18603515625, "learning_rate": 8.808080808080809e-06, "loss": 179.7351, "step": 10900 }, { "epoch": 0.09025106506183563, "grad_norm": 1393.46044921875, "learning_rate": 8.816161616161617e-06, "loss": 239.5334, "step": 10910 }, { "epoch": 0.09033378831120487, "grad_norm": 932.0426025390625, "learning_rate": 8.824242424242426e-06, "loss": 159.2398, "step": 10920 }, { "epoch": 0.0904165115605741, "grad_norm": 1175.749755859375, "learning_rate": 8.832323232323233e-06, "loss": 196.0824, "step": 10930 }, { "epoch": 0.09049923480994333, "grad_norm": 669.9056396484375, "learning_rate": 8.840404040404041e-06, "loss": 195.3154, "step": 10940 }, { "epoch": 0.09058195805931257, "grad_norm": 2161.71826171875, "learning_rate": 8.84848484848485e-06, "loss": 201.2208, "step": 10950 }, { "epoch": 0.0906646813086818, "grad_norm": 1083.76318359375, "learning_rate": 8.856565656565658e-06, "loss": 181.3205, "step": 10960 }, { "epoch": 0.09074740455805104, "grad_norm": 1360.2738037109375, "learning_rate": 8.864646464646466e-06, "loss": 173.5026, "step": 10970 }, { "epoch": 0.09083012780742028, "grad_norm": 1274.3690185546875, "learning_rate": 8.872727272727275e-06, "loss": 191.3013, "step": 10980 }, { "epoch": 0.0909128510567895, "grad_norm": 1275.59814453125, "learning_rate": 8.880808080808082e-06, "loss": 180.379, "step": 10990 }, { "epoch": 0.09099557430615875, "grad_norm": 1908.28125, "learning_rate": 8.888888888888888e-06, "loss": 226.928, "step": 11000 }, { "epoch": 0.09107829755552799, "grad_norm": 1636.098388671875, "learning_rate": 8.896969696969697e-06, "loss": 166.5137, "step": 11010 }, { "epoch": 0.09116102080489721, "grad_norm": 1362.2508544921875, "learning_rate": 8.905050505050505e-06, "loss": 162.6263, "step": 11020 }, { "epoch": 0.09124374405426645, "grad_norm": 958.8033447265625, "learning_rate": 8.913131313131314e-06, "loss": 126.4212, "step": 11030 }, { "epoch": 0.09132646730363568, "grad_norm": 4022.8896484375, "learning_rate": 8.921212121212122e-06, "loss": 171.8223, "step": 11040 }, { "epoch": 0.09140919055300492, "grad_norm": 1437.0355224609375, "learning_rate": 8.92929292929293e-06, "loss": 150.3858, "step": 11050 }, { "epoch": 0.09149191380237416, "grad_norm": 1457.9029541015625, "learning_rate": 8.937373737373737e-06, "loss": 191.1608, "step": 11060 }, { "epoch": 0.09157463705174339, "grad_norm": 1079.6673583984375, "learning_rate": 8.945454545454546e-06, "loss": 169.316, "step": 11070 }, { "epoch": 0.09165736030111263, "grad_norm": 1028.742431640625, "learning_rate": 8.953535353535354e-06, "loss": 141.8136, "step": 11080 }, { "epoch": 0.09174008355048187, "grad_norm": 1539.916259765625, "learning_rate": 8.961616161616163e-06, "loss": 196.1172, "step": 11090 }, { "epoch": 0.0918228067998511, "grad_norm": 1310.006591796875, "learning_rate": 8.969696969696971e-06, "loss": 153.3139, "step": 11100 }, { "epoch": 0.09190553004922034, "grad_norm": 3632.826416015625, "learning_rate": 8.977777777777778e-06, "loss": 274.3921, "step": 11110 }, { "epoch": 0.09198825329858956, "grad_norm": 1360.5455322265625, "learning_rate": 8.985858585858586e-06, "loss": 199.4904, "step": 11120 }, { "epoch": 0.0920709765479588, "grad_norm": 1219.487548828125, "learning_rate": 8.993939393939395e-06, "loss": 138.8559, "step": 11130 }, { "epoch": 0.09215369979732804, "grad_norm": 1320.4920654296875, "learning_rate": 9.002020202020203e-06, "loss": 179.0958, "step": 11140 }, { "epoch": 0.09223642304669727, "grad_norm": 754.0531616210938, "learning_rate": 9.010101010101012e-06, "loss": 183.8919, "step": 11150 }, { "epoch": 0.09231914629606651, "grad_norm": 1132.8018798828125, "learning_rate": 9.01818181818182e-06, "loss": 138.0276, "step": 11160 }, { "epoch": 0.09240186954543575, "grad_norm": 1015.0302124023438, "learning_rate": 9.026262626262627e-06, "loss": 169.833, "step": 11170 }, { "epoch": 0.09248459279480498, "grad_norm": 1256.841552734375, "learning_rate": 9.034343434343435e-06, "loss": 192.5117, "step": 11180 }, { "epoch": 0.09256731604417422, "grad_norm": 637.6029663085938, "learning_rate": 9.042424242424244e-06, "loss": 136.4471, "step": 11190 }, { "epoch": 0.09265003929354346, "grad_norm": 2063.305419921875, "learning_rate": 9.050505050505052e-06, "loss": 187.3035, "step": 11200 }, { "epoch": 0.09273276254291268, "grad_norm": 893.4026489257812, "learning_rate": 9.058585858585859e-06, "loss": 197.6645, "step": 11210 }, { "epoch": 0.09281548579228192, "grad_norm": 11727.92578125, "learning_rate": 9.066666666666667e-06, "loss": 184.4137, "step": 11220 }, { "epoch": 0.09289820904165115, "grad_norm": 939.4194946289062, "learning_rate": 9.074747474747476e-06, "loss": 163.7697, "step": 11230 }, { "epoch": 0.09298093229102039, "grad_norm": 1211.7532958984375, "learning_rate": 9.082828282828283e-06, "loss": 150.1024, "step": 11240 }, { "epoch": 0.09306365554038963, "grad_norm": 1020.1021118164062, "learning_rate": 9.090909090909091e-06, "loss": 176.2725, "step": 11250 }, { "epoch": 0.09314637878975886, "grad_norm": 1597.582763671875, "learning_rate": 9.0989898989899e-06, "loss": 161.5863, "step": 11260 }, { "epoch": 0.0932291020391281, "grad_norm": 1205.4434814453125, "learning_rate": 9.107070707070708e-06, "loss": 191.3436, "step": 11270 }, { "epoch": 0.09331182528849734, "grad_norm": 1347.427001953125, "learning_rate": 9.115151515151516e-06, "loss": 147.056, "step": 11280 }, { "epoch": 0.09339454853786656, "grad_norm": 1340.0323486328125, "learning_rate": 9.123232323232323e-06, "loss": 166.0659, "step": 11290 }, { "epoch": 0.0934772717872358, "grad_norm": 1243.469482421875, "learning_rate": 9.131313131313132e-06, "loss": 160.2771, "step": 11300 }, { "epoch": 0.09355999503660503, "grad_norm": 1073.79638671875, "learning_rate": 9.13939393939394e-06, "loss": 189.4957, "step": 11310 }, { "epoch": 0.09364271828597427, "grad_norm": 1185.810546875, "learning_rate": 9.147474747474748e-06, "loss": 169.7764, "step": 11320 }, { "epoch": 0.09372544153534351, "grad_norm": 1002.9879760742188, "learning_rate": 9.155555555555557e-06, "loss": 117.5662, "step": 11330 }, { "epoch": 0.09380816478471274, "grad_norm": 1063.5313720703125, "learning_rate": 9.163636363636365e-06, "loss": 199.0167, "step": 11340 }, { "epoch": 0.09389088803408198, "grad_norm": 975.7847290039062, "learning_rate": 9.171717171717172e-06, "loss": 178.3596, "step": 11350 }, { "epoch": 0.09397361128345122, "grad_norm": 1078.6475830078125, "learning_rate": 9.17979797979798e-06, "loss": 167.8156, "step": 11360 }, { "epoch": 0.09405633453282045, "grad_norm": 2150.571044921875, "learning_rate": 9.187878787878789e-06, "loss": 151.6369, "step": 11370 }, { "epoch": 0.09413905778218969, "grad_norm": 1107.6488037109375, "learning_rate": 9.195959595959597e-06, "loss": 138.8753, "step": 11380 }, { "epoch": 0.09422178103155891, "grad_norm": 2013.7427978515625, "learning_rate": 9.204040404040406e-06, "loss": 168.8607, "step": 11390 }, { "epoch": 0.09430450428092815, "grad_norm": 926.5513916015625, "learning_rate": 9.212121212121213e-06, "loss": 155.2859, "step": 11400 }, { "epoch": 0.0943872275302974, "grad_norm": 640.766357421875, "learning_rate": 9.220202020202021e-06, "loss": 170.4931, "step": 11410 }, { "epoch": 0.09446995077966662, "grad_norm": 2521.883544921875, "learning_rate": 9.228282828282828e-06, "loss": 163.8456, "step": 11420 }, { "epoch": 0.09455267402903586, "grad_norm": 2574.5419921875, "learning_rate": 9.236363636363636e-06, "loss": 192.8077, "step": 11430 }, { "epoch": 0.0946353972784051, "grad_norm": 1514.116943359375, "learning_rate": 9.244444444444445e-06, "loss": 190.4653, "step": 11440 }, { "epoch": 0.09471812052777433, "grad_norm": 1116.5655517578125, "learning_rate": 9.252525252525253e-06, "loss": 150.5008, "step": 11450 }, { "epoch": 0.09480084377714357, "grad_norm": 7808.6015625, "learning_rate": 9.260606060606062e-06, "loss": 282.1488, "step": 11460 }, { "epoch": 0.09488356702651281, "grad_norm": 1106.9825439453125, "learning_rate": 9.268686868686868e-06, "loss": 195.7642, "step": 11470 }, { "epoch": 0.09496629027588203, "grad_norm": 3581.72900390625, "learning_rate": 9.276767676767677e-06, "loss": 179.7523, "step": 11480 }, { "epoch": 0.09504901352525127, "grad_norm": 1101.411865234375, "learning_rate": 9.284848484848485e-06, "loss": 199.7394, "step": 11490 }, { "epoch": 0.0951317367746205, "grad_norm": 1929.9052734375, "learning_rate": 9.292929292929294e-06, "loss": 174.8564, "step": 11500 }, { "epoch": 0.09521446002398974, "grad_norm": 1121.5028076171875, "learning_rate": 9.301010101010102e-06, "loss": 134.3296, "step": 11510 }, { "epoch": 0.09529718327335898, "grad_norm": 1080.38671875, "learning_rate": 9.30909090909091e-06, "loss": 173.9918, "step": 11520 }, { "epoch": 0.09537990652272821, "grad_norm": 1371.1961669921875, "learning_rate": 9.317171717171717e-06, "loss": 213.5175, "step": 11530 }, { "epoch": 0.09546262977209745, "grad_norm": 1480.9495849609375, "learning_rate": 9.325252525252526e-06, "loss": 136.4908, "step": 11540 }, { "epoch": 0.09554535302146669, "grad_norm": 912.1797485351562, "learning_rate": 9.333333333333334e-06, "loss": 215.026, "step": 11550 }, { "epoch": 0.09562807627083592, "grad_norm": 2468.00341796875, "learning_rate": 9.341414141414143e-06, "loss": 175.5986, "step": 11560 }, { "epoch": 0.09571079952020516, "grad_norm": 1296.7786865234375, "learning_rate": 9.349494949494951e-06, "loss": 209.1408, "step": 11570 }, { "epoch": 0.09579352276957438, "grad_norm": 1264.5654296875, "learning_rate": 9.357575757575758e-06, "loss": 200.8274, "step": 11580 }, { "epoch": 0.09587624601894362, "grad_norm": 1155.8309326171875, "learning_rate": 9.365656565656566e-06, "loss": 164.7913, "step": 11590 }, { "epoch": 0.09595896926831286, "grad_norm": 1020.3081665039062, "learning_rate": 9.373737373737375e-06, "loss": 155.3867, "step": 11600 }, { "epoch": 0.09604169251768209, "grad_norm": 1648.6768798828125, "learning_rate": 9.381818181818183e-06, "loss": 150.0429, "step": 11610 }, { "epoch": 0.09612441576705133, "grad_norm": 1153.5174560546875, "learning_rate": 9.389898989898992e-06, "loss": 183.8605, "step": 11620 }, { "epoch": 0.09620713901642057, "grad_norm": 1894.8492431640625, "learning_rate": 9.397979797979799e-06, "loss": 178.0279, "step": 11630 }, { "epoch": 0.0962898622657898, "grad_norm": 1127.750244140625, "learning_rate": 9.406060606060607e-06, "loss": 162.6299, "step": 11640 }, { "epoch": 0.09637258551515904, "grad_norm": 1268.74267578125, "learning_rate": 9.414141414141414e-06, "loss": 155.4865, "step": 11650 }, { "epoch": 0.09645530876452828, "grad_norm": 1007.6961669921875, "learning_rate": 9.422222222222222e-06, "loss": 209.233, "step": 11660 }, { "epoch": 0.0965380320138975, "grad_norm": 1457.4285888671875, "learning_rate": 9.43030303030303e-06, "loss": 163.3144, "step": 11670 }, { "epoch": 0.09662075526326674, "grad_norm": 1042.603515625, "learning_rate": 9.438383838383839e-06, "loss": 182.3341, "step": 11680 }, { "epoch": 0.09670347851263597, "grad_norm": 1047.1702880859375, "learning_rate": 9.446464646464648e-06, "loss": 167.2329, "step": 11690 }, { "epoch": 0.09678620176200521, "grad_norm": 1840.010009765625, "learning_rate": 9.454545454545456e-06, "loss": 204.4755, "step": 11700 }, { "epoch": 0.09686892501137445, "grad_norm": 573.9185180664062, "learning_rate": 9.462626262626263e-06, "loss": 106.0682, "step": 11710 }, { "epoch": 0.09695164826074368, "grad_norm": 1605.5093994140625, "learning_rate": 9.470707070707071e-06, "loss": 214.7734, "step": 11720 }, { "epoch": 0.09703437151011292, "grad_norm": 2460.1376953125, "learning_rate": 9.47878787878788e-06, "loss": 180.7382, "step": 11730 }, { "epoch": 0.09711709475948216, "grad_norm": 1100.57080078125, "learning_rate": 9.486868686868688e-06, "loss": 220.1118, "step": 11740 }, { "epoch": 0.09719981800885139, "grad_norm": 937.7584838867188, "learning_rate": 9.494949494949497e-06, "loss": 176.8332, "step": 11750 }, { "epoch": 0.09728254125822063, "grad_norm": 1534.452880859375, "learning_rate": 9.503030303030303e-06, "loss": 173.9935, "step": 11760 }, { "epoch": 0.09736526450758985, "grad_norm": 1466.6376953125, "learning_rate": 9.511111111111112e-06, "loss": 196.6087, "step": 11770 }, { "epoch": 0.09744798775695909, "grad_norm": 1731.87841796875, "learning_rate": 9.51919191919192e-06, "loss": 179.0696, "step": 11780 }, { "epoch": 0.09753071100632833, "grad_norm": 1235.0560302734375, "learning_rate": 9.527272727272729e-06, "loss": 164.9756, "step": 11790 }, { "epoch": 0.09761343425569756, "grad_norm": 662.07568359375, "learning_rate": 9.535353535353537e-06, "loss": 155.2859, "step": 11800 }, { "epoch": 0.0976961575050668, "grad_norm": 642.6212158203125, "learning_rate": 9.543434343434344e-06, "loss": 183.2646, "step": 11810 }, { "epoch": 0.09777888075443604, "grad_norm": 861.9931030273438, "learning_rate": 9.551515151515152e-06, "loss": 140.6058, "step": 11820 }, { "epoch": 0.09786160400380527, "grad_norm": 2064.848388671875, "learning_rate": 9.55959595959596e-06, "loss": 175.8456, "step": 11830 }, { "epoch": 0.0979443272531745, "grad_norm": 1402.7391357421875, "learning_rate": 9.56767676767677e-06, "loss": 168.1851, "step": 11840 }, { "epoch": 0.09802705050254373, "grad_norm": 1274.8916015625, "learning_rate": 9.575757575757576e-06, "loss": 177.3613, "step": 11850 }, { "epoch": 0.09810977375191297, "grad_norm": 1175.4793701171875, "learning_rate": 9.583838383838384e-06, "loss": 155.9946, "step": 11860 }, { "epoch": 0.09819249700128221, "grad_norm": 1113.3656005859375, "learning_rate": 9.591919191919193e-06, "loss": 174.7684, "step": 11870 }, { "epoch": 0.09827522025065144, "grad_norm": 1227.9544677734375, "learning_rate": 9.600000000000001e-06, "loss": 173.7199, "step": 11880 }, { "epoch": 0.09835794350002068, "grad_norm": 1131.5029296875, "learning_rate": 9.608080808080808e-06, "loss": 179.6841, "step": 11890 }, { "epoch": 0.09844066674938992, "grad_norm": 1879.9422607421875, "learning_rate": 9.616161616161616e-06, "loss": 201.8642, "step": 11900 }, { "epoch": 0.09852338999875915, "grad_norm": 1383.9271240234375, "learning_rate": 9.624242424242425e-06, "loss": 155.471, "step": 11910 }, { "epoch": 0.09860611324812839, "grad_norm": 1202.215576171875, "learning_rate": 9.632323232323233e-06, "loss": 176.2691, "step": 11920 }, { "epoch": 0.09868883649749763, "grad_norm": 713.47216796875, "learning_rate": 9.640404040404042e-06, "loss": 129.0435, "step": 11930 }, { "epoch": 0.09877155974686685, "grad_norm": 1676.49658203125, "learning_rate": 9.648484848484849e-06, "loss": 158.0141, "step": 11940 }, { "epoch": 0.0988542829962361, "grad_norm": 822.8709716796875, "learning_rate": 9.656565656565657e-06, "loss": 128.7683, "step": 11950 }, { "epoch": 0.09893700624560532, "grad_norm": 2505.596923828125, "learning_rate": 9.664646464646465e-06, "loss": 173.4101, "step": 11960 }, { "epoch": 0.09901972949497456, "grad_norm": 764.0625610351562, "learning_rate": 9.672727272727274e-06, "loss": 186.7536, "step": 11970 }, { "epoch": 0.0991024527443438, "grad_norm": 1434.5181884765625, "learning_rate": 9.680808080808082e-06, "loss": 195.2917, "step": 11980 }, { "epoch": 0.09918517599371303, "grad_norm": 916.0157470703125, "learning_rate": 9.688888888888889e-06, "loss": 151.0252, "step": 11990 }, { "epoch": 0.09926789924308227, "grad_norm": 1034.74072265625, "learning_rate": 9.696969696969698e-06, "loss": 163.2282, "step": 12000 }, { "epoch": 0.09935062249245151, "grad_norm": 1380.8863525390625, "learning_rate": 9.705050505050506e-06, "loss": 139.9053, "step": 12010 }, { "epoch": 0.09943334574182074, "grad_norm": 911.8162231445312, "learning_rate": 9.713131313131314e-06, "loss": 159.4434, "step": 12020 }, { "epoch": 0.09951606899118998, "grad_norm": 1710.5338134765625, "learning_rate": 9.721212121212123e-06, "loss": 152.8905, "step": 12030 }, { "epoch": 0.0995987922405592, "grad_norm": 1827.9671630859375, "learning_rate": 9.729292929292931e-06, "loss": 195.7561, "step": 12040 }, { "epoch": 0.09968151548992844, "grad_norm": 1594.53759765625, "learning_rate": 9.737373737373738e-06, "loss": 172.4959, "step": 12050 }, { "epoch": 0.09976423873929768, "grad_norm": 985.9189453125, "learning_rate": 9.745454545454547e-06, "loss": 167.3074, "step": 12060 }, { "epoch": 0.09984696198866691, "grad_norm": 1075.2579345703125, "learning_rate": 9.753535353535353e-06, "loss": 163.0599, "step": 12070 }, { "epoch": 0.09992968523803615, "grad_norm": 2030.679443359375, "learning_rate": 9.761616161616162e-06, "loss": 163.872, "step": 12080 }, { "epoch": 0.10001240848740539, "grad_norm": 1216.58984375, "learning_rate": 9.76969696969697e-06, "loss": 158.4474, "step": 12090 }, { "epoch": 0.10009513173677462, "grad_norm": 1842.97998046875, "learning_rate": 9.777777777777779e-06, "loss": 225.2362, "step": 12100 }, { "epoch": 0.10017785498614386, "grad_norm": 1359.1461181640625, "learning_rate": 9.785858585858587e-06, "loss": 172.9057, "step": 12110 }, { "epoch": 0.10026057823551308, "grad_norm": 1055.8148193359375, "learning_rate": 9.793939393939394e-06, "loss": 153.7417, "step": 12120 }, { "epoch": 0.10034330148488232, "grad_norm": 1045.6163330078125, "learning_rate": 9.802020202020202e-06, "loss": 151.8674, "step": 12130 }, { "epoch": 0.10042602473425156, "grad_norm": 702.3483276367188, "learning_rate": 9.81010101010101e-06, "loss": 154.9376, "step": 12140 }, { "epoch": 0.10050874798362079, "grad_norm": 582.2290649414062, "learning_rate": 9.81818181818182e-06, "loss": 191.1368, "step": 12150 }, { "epoch": 0.10059147123299003, "grad_norm": 1067.549072265625, "learning_rate": 9.826262626262628e-06, "loss": 215.4936, "step": 12160 }, { "epoch": 0.10067419448235927, "grad_norm": 1477.08251953125, "learning_rate": 9.834343434343434e-06, "loss": 179.9154, "step": 12170 }, { "epoch": 0.1007569177317285, "grad_norm": 1615.7445068359375, "learning_rate": 9.842424242424243e-06, "loss": 151.6433, "step": 12180 }, { "epoch": 0.10083964098109774, "grad_norm": 1283.7108154296875, "learning_rate": 9.850505050505051e-06, "loss": 176.6817, "step": 12190 }, { "epoch": 0.10092236423046698, "grad_norm": 1741.5172119140625, "learning_rate": 9.85858585858586e-06, "loss": 148.4978, "step": 12200 }, { "epoch": 0.1010050874798362, "grad_norm": 956.2932739257812, "learning_rate": 9.866666666666668e-06, "loss": 178.0466, "step": 12210 }, { "epoch": 0.10108781072920545, "grad_norm": 771.7984008789062, "learning_rate": 9.874747474747477e-06, "loss": 177.3261, "step": 12220 }, { "epoch": 0.10117053397857467, "grad_norm": 703.4427490234375, "learning_rate": 9.882828282828283e-06, "loss": 175.3549, "step": 12230 }, { "epoch": 0.10125325722794391, "grad_norm": 931.7167358398438, "learning_rate": 9.890909090909092e-06, "loss": 158.6873, "step": 12240 }, { "epoch": 0.10133598047731315, "grad_norm": 1080.5570068359375, "learning_rate": 9.8989898989899e-06, "loss": 109.4355, "step": 12250 }, { "epoch": 0.10141870372668238, "grad_norm": 1163.09423828125, "learning_rate": 9.907070707070709e-06, "loss": 164.6523, "step": 12260 }, { "epoch": 0.10150142697605162, "grad_norm": 870.4569091796875, "learning_rate": 9.915151515151515e-06, "loss": 162.8174, "step": 12270 }, { "epoch": 0.10158415022542086, "grad_norm": 976.0827026367188, "learning_rate": 9.923232323232324e-06, "loss": 149.6927, "step": 12280 }, { "epoch": 0.10166687347479009, "grad_norm": 881.8626098632812, "learning_rate": 9.931313131313132e-06, "loss": 128.7362, "step": 12290 }, { "epoch": 0.10174959672415933, "grad_norm": 612.5805053710938, "learning_rate": 9.939393939393939e-06, "loss": 129.0437, "step": 12300 }, { "epoch": 0.10183231997352855, "grad_norm": 1372.064453125, "learning_rate": 9.947474747474748e-06, "loss": 188.9469, "step": 12310 }, { "epoch": 0.1019150432228978, "grad_norm": 1295.59375, "learning_rate": 9.955555555555556e-06, "loss": 167.0848, "step": 12320 }, { "epoch": 0.10199776647226703, "grad_norm": 1037.228515625, "learning_rate": 9.963636363636364e-06, "loss": 176.8787, "step": 12330 }, { "epoch": 0.10208048972163626, "grad_norm": 1227.047607421875, "learning_rate": 9.971717171717173e-06, "loss": 198.2504, "step": 12340 }, { "epoch": 0.1021632129710055, "grad_norm": 1416.220458984375, "learning_rate": 9.97979797979798e-06, "loss": 179.9958, "step": 12350 }, { "epoch": 0.10224593622037474, "grad_norm": 1424.654052734375, "learning_rate": 9.987878787878788e-06, "loss": 191.8388, "step": 12360 }, { "epoch": 0.10232865946974397, "grad_norm": 1317.3236083984375, "learning_rate": 9.995959595959597e-06, "loss": 167.6917, "step": 12370 }, { "epoch": 0.10241138271911321, "grad_norm": 868.8121337890625, "learning_rate": 9.99999995027162e-06, "loss": 156.1878, "step": 12380 }, { "epoch": 0.10249410596848244, "grad_norm": 889.690185546875, "learning_rate": 9.99999955244457e-06, "loss": 188.0309, "step": 12390 }, { "epoch": 0.10257682921785168, "grad_norm": 964.9615478515625, "learning_rate": 9.999998756790503e-06, "loss": 162.0736, "step": 12400 }, { "epoch": 0.10265955246722092, "grad_norm": 2292.847900390625, "learning_rate": 9.999997563309483e-06, "loss": 138.4502, "step": 12410 }, { "epoch": 0.10274227571659014, "grad_norm": 1073.1864013671875, "learning_rate": 9.999995972001602e-06, "loss": 183.7101, "step": 12420 }, { "epoch": 0.10282499896595938, "grad_norm": 1411.09521484375, "learning_rate": 9.99999398286699e-06, "loss": 172.5208, "step": 12430 }, { "epoch": 0.10290772221532862, "grad_norm": 1048.4619140625, "learning_rate": 9.999991595905803e-06, "loss": 169.999, "step": 12440 }, { "epoch": 0.10299044546469785, "grad_norm": 986.3778686523438, "learning_rate": 9.999988811118232e-06, "loss": 146.7089, "step": 12450 }, { "epoch": 0.10307316871406709, "grad_norm": 795.2116088867188, "learning_rate": 9.999985628504498e-06, "loss": 159.4188, "step": 12460 }, { "epoch": 0.10315589196343633, "grad_norm": 919.0172119140625, "learning_rate": 9.999982048064854e-06, "loss": 144.6598, "step": 12470 }, { "epoch": 0.10323861521280556, "grad_norm": 1297.25732421875, "learning_rate": 9.999978069799585e-06, "loss": 171.9091, "step": 12480 }, { "epoch": 0.1033213384621748, "grad_norm": 1352.2923583984375, "learning_rate": 9.999973693709008e-06, "loss": 151.0232, "step": 12490 }, { "epoch": 0.10340406171154402, "grad_norm": 1723.31640625, "learning_rate": 9.99996891979347e-06, "loss": 131.0583, "step": 12500 }, { "epoch": 0.10348678496091326, "grad_norm": 1077.4090576171875, "learning_rate": 9.999963748053354e-06, "loss": 155.0065, "step": 12510 }, { "epoch": 0.1035695082102825, "grad_norm": 881.5154418945312, "learning_rate": 9.999958178489069e-06, "loss": 150.2538, "step": 12520 }, { "epoch": 0.10365223145965173, "grad_norm": 1021.1143188476562, "learning_rate": 9.999952211101056e-06, "loss": 154.2661, "step": 12530 }, { "epoch": 0.10373495470902097, "grad_norm": 1220.9776611328125, "learning_rate": 9.999945845889795e-06, "loss": 154.2683, "step": 12540 }, { "epoch": 0.10381767795839021, "grad_norm": 1148.5760498046875, "learning_rate": 9.999939082855788e-06, "loss": 137.0497, "step": 12550 }, { "epoch": 0.10390040120775944, "grad_norm": 1300.0975341796875, "learning_rate": 9.999931921999575e-06, "loss": 169.4536, "step": 12560 }, { "epoch": 0.10398312445712868, "grad_norm": 1183.8826904296875, "learning_rate": 9.999924363321726e-06, "loss": 166.6497, "step": 12570 }, { "epoch": 0.1040658477064979, "grad_norm": 718.7395629882812, "learning_rate": 9.999916406822843e-06, "loss": 109.411, "step": 12580 }, { "epoch": 0.10414857095586715, "grad_norm": 841.6725463867188, "learning_rate": 9.999908052503557e-06, "loss": 146.8344, "step": 12590 }, { "epoch": 0.10423129420523639, "grad_norm": 918.6251831054688, "learning_rate": 9.999899300364534e-06, "loss": 162.9566, "step": 12600 }, { "epoch": 0.10431401745460561, "grad_norm": 1467.325927734375, "learning_rate": 9.99989015040647e-06, "loss": 189.8623, "step": 12610 }, { "epoch": 0.10439674070397485, "grad_norm": 2947.025146484375, "learning_rate": 9.999880602630092e-06, "loss": 141.2983, "step": 12620 }, { "epoch": 0.10447946395334409, "grad_norm": 1483.4896240234375, "learning_rate": 9.999870657036161e-06, "loss": 162.2035, "step": 12630 }, { "epoch": 0.10456218720271332, "grad_norm": 1846.94580078125, "learning_rate": 9.99986031362547e-06, "loss": 150.0733, "step": 12640 }, { "epoch": 0.10464491045208256, "grad_norm": 1538.964111328125, "learning_rate": 9.99984957239884e-06, "loss": 206.5021, "step": 12650 }, { "epoch": 0.1047276337014518, "grad_norm": 1100.305908203125, "learning_rate": 9.999838433357124e-06, "loss": 197.57, "step": 12660 }, { "epoch": 0.10481035695082103, "grad_norm": 594.6061401367188, "learning_rate": 9.99982689650121e-06, "loss": 179.5414, "step": 12670 }, { "epoch": 0.10489308020019027, "grad_norm": 737.6015014648438, "learning_rate": 9.999814961832018e-06, "loss": 170.4644, "step": 12680 }, { "epoch": 0.1049758034495595, "grad_norm": 1346.4503173828125, "learning_rate": 9.999802629350492e-06, "loss": 202.0369, "step": 12690 }, { "epoch": 0.10505852669892873, "grad_norm": 2559.85546875, "learning_rate": 9.99978989905762e-06, "loss": 221.5045, "step": 12700 }, { "epoch": 0.10514124994829797, "grad_norm": 1314.4476318359375, "learning_rate": 9.999776770954411e-06, "loss": 173.8742, "step": 12710 }, { "epoch": 0.1052239731976672, "grad_norm": 3551.19677734375, "learning_rate": 9.99976324504191e-06, "loss": 149.5013, "step": 12720 }, { "epoch": 0.10530669644703644, "grad_norm": 1428.34814453125, "learning_rate": 9.999749321321192e-06, "loss": 251.0179, "step": 12730 }, { "epoch": 0.10538941969640568, "grad_norm": 753.5355224609375, "learning_rate": 9.999734999793369e-06, "loss": 163.6853, "step": 12740 }, { "epoch": 0.10547214294577491, "grad_norm": 1578.79541015625, "learning_rate": 9.999720280459576e-06, "loss": 154.6416, "step": 12750 }, { "epoch": 0.10555486619514415, "grad_norm": 1111.734130859375, "learning_rate": 9.999705163320987e-06, "loss": 177.3941, "step": 12760 }, { "epoch": 0.10563758944451337, "grad_norm": 1722.9654541015625, "learning_rate": 9.999689648378801e-06, "loss": 179.8888, "step": 12770 }, { "epoch": 0.10572031269388261, "grad_norm": 1038.706298828125, "learning_rate": 9.999673735634259e-06, "loss": 127.1906, "step": 12780 }, { "epoch": 0.10580303594325186, "grad_norm": 11962.8779296875, "learning_rate": 9.99965742508862e-06, "loss": 213.939, "step": 12790 }, { "epoch": 0.10588575919262108, "grad_norm": 1482.868408203125, "learning_rate": 9.999640716743186e-06, "loss": 133.2547, "step": 12800 }, { "epoch": 0.10596848244199032, "grad_norm": 3282.25390625, "learning_rate": 9.999623610599287e-06, "loss": 160.013, "step": 12810 }, { "epoch": 0.10605120569135956, "grad_norm": 1836.9112548828125, "learning_rate": 9.999606106658282e-06, "loss": 201.4363, "step": 12820 }, { "epoch": 0.10613392894072879, "grad_norm": 816.5084838867188, "learning_rate": 9.999588204921562e-06, "loss": 174.8686, "step": 12830 }, { "epoch": 0.10621665219009803, "grad_norm": 1352.0396728515625, "learning_rate": 9.999569905390556e-06, "loss": 167.4276, "step": 12840 }, { "epoch": 0.10629937543946726, "grad_norm": 533.4453125, "learning_rate": 9.999551208066716e-06, "loss": 100.8425, "step": 12850 }, { "epoch": 0.1063820986888365, "grad_norm": 1191.3555908203125, "learning_rate": 9.99953211295153e-06, "loss": 147.7438, "step": 12860 }, { "epoch": 0.10646482193820574, "grad_norm": 1642.895263671875, "learning_rate": 9.999512620046523e-06, "loss": 170.0533, "step": 12870 }, { "epoch": 0.10654754518757496, "grad_norm": 1362.4990234375, "learning_rate": 9.999492729353238e-06, "loss": 177.1871, "step": 12880 }, { "epoch": 0.1066302684369442, "grad_norm": 879.8888549804688, "learning_rate": 9.999472440873261e-06, "loss": 160.7778, "step": 12890 }, { "epoch": 0.10671299168631344, "grad_norm": 1243.377685546875, "learning_rate": 9.999451754608208e-06, "loss": 150.4305, "step": 12900 }, { "epoch": 0.10679571493568267, "grad_norm": 984.1234130859375, "learning_rate": 9.999430670559723e-06, "loss": 139.6723, "step": 12910 }, { "epoch": 0.10687843818505191, "grad_norm": 852.8709716796875, "learning_rate": 9.999409188729484e-06, "loss": 134.8206, "step": 12920 }, { "epoch": 0.10696116143442115, "grad_norm": 886.4750366210938, "learning_rate": 9.999387309119198e-06, "loss": 150.4601, "step": 12930 }, { "epoch": 0.10704388468379038, "grad_norm": 1050.5213623046875, "learning_rate": 9.999365031730609e-06, "loss": 162.7591, "step": 12940 }, { "epoch": 0.10712660793315962, "grad_norm": 1679.367919921875, "learning_rate": 9.99934235656549e-06, "loss": 146.1199, "step": 12950 }, { "epoch": 0.10720933118252884, "grad_norm": 1546.12939453125, "learning_rate": 9.999319283625641e-06, "loss": 207.9114, "step": 12960 }, { "epoch": 0.10729205443189808, "grad_norm": 856.313720703125, "learning_rate": 9.999295812912902e-06, "loss": 168.0912, "step": 12970 }, { "epoch": 0.10737477768126732, "grad_norm": 1466.169677734375, "learning_rate": 9.999271944429139e-06, "loss": 202.6795, "step": 12980 }, { "epoch": 0.10745750093063655, "grad_norm": 1105.8492431640625, "learning_rate": 9.99924767817625e-06, "loss": 139.1426, "step": 12990 }, { "epoch": 0.10754022418000579, "grad_norm": 1072.4610595703125, "learning_rate": 9.999223014156167e-06, "loss": 196.2584, "step": 13000 }, { "epoch": 0.10762294742937503, "grad_norm": 820.6765747070312, "learning_rate": 9.999197952370851e-06, "loss": 177.7073, "step": 13010 }, { "epoch": 0.10770567067874426, "grad_norm": 788.7813720703125, "learning_rate": 9.9991724928223e-06, "loss": 157.0522, "step": 13020 }, { "epoch": 0.1077883939281135, "grad_norm": 1179.322265625, "learning_rate": 9.999146635512535e-06, "loss": 189.2783, "step": 13030 }, { "epoch": 0.10787111717748273, "grad_norm": 1035.255615234375, "learning_rate": 9.999120380443614e-06, "loss": 145.2693, "step": 13040 }, { "epoch": 0.10795384042685197, "grad_norm": 1015.682861328125, "learning_rate": 9.99909372761763e-06, "loss": 143.6347, "step": 13050 }, { "epoch": 0.1080365636762212, "grad_norm": 2936.619873046875, "learning_rate": 9.9990666770367e-06, "loss": 179.5399, "step": 13060 }, { "epoch": 0.10811928692559043, "grad_norm": 1349.863037109375, "learning_rate": 9.999039228702975e-06, "loss": 176.5338, "step": 13070 }, { "epoch": 0.10820201017495967, "grad_norm": 1154.5147705078125, "learning_rate": 9.999011382618644e-06, "loss": 166.4444, "step": 13080 }, { "epoch": 0.10828473342432891, "grad_norm": 1495.1844482421875, "learning_rate": 9.998983138785919e-06, "loss": 149.649, "step": 13090 }, { "epoch": 0.10836745667369814, "grad_norm": 1207.58984375, "learning_rate": 9.998954497207045e-06, "loss": 149.596, "step": 13100 }, { "epoch": 0.10845017992306738, "grad_norm": 1018.6047973632812, "learning_rate": 9.998925457884307e-06, "loss": 154.7165, "step": 13110 }, { "epoch": 0.1085329031724366, "grad_norm": 1084.2874755859375, "learning_rate": 9.99889602082001e-06, "loss": 145.5709, "step": 13120 }, { "epoch": 0.10861562642180585, "grad_norm": 1162.242919921875, "learning_rate": 9.998866186016501e-06, "loss": 168.6215, "step": 13130 }, { "epoch": 0.10869834967117509, "grad_norm": 1217.2177734375, "learning_rate": 9.99883595347615e-06, "loss": 150.9769, "step": 13140 }, { "epoch": 0.10878107292054431, "grad_norm": 884.3536376953125, "learning_rate": 9.998805323201364e-06, "loss": 161.5837, "step": 13150 }, { "epoch": 0.10886379616991355, "grad_norm": 1069.2374267578125, "learning_rate": 9.998774295194579e-06, "loss": 167.3784, "step": 13160 }, { "epoch": 0.1089465194192828, "grad_norm": 749.6088256835938, "learning_rate": 9.998742869458264e-06, "loss": 115.7084, "step": 13170 }, { "epoch": 0.10902924266865202, "grad_norm": 703.9533081054688, "learning_rate": 9.998711045994922e-06, "loss": 162.0743, "step": 13180 }, { "epoch": 0.10911196591802126, "grad_norm": 1521.7666015625, "learning_rate": 9.998678824807082e-06, "loss": 189.686, "step": 13190 }, { "epoch": 0.1091946891673905, "grad_norm": 1742.9715576171875, "learning_rate": 9.99864620589731e-06, "loss": 163.9251, "step": 13200 }, { "epoch": 0.10927741241675973, "grad_norm": 999.13623046875, "learning_rate": 9.998613189268197e-06, "loss": 142.0809, "step": 13210 }, { "epoch": 0.10936013566612897, "grad_norm": 1409.1549072265625, "learning_rate": 9.998579774922377e-06, "loss": 149.1874, "step": 13220 }, { "epoch": 0.1094428589154982, "grad_norm": 675.3158569335938, "learning_rate": 9.998545962862503e-06, "loss": 117.2426, "step": 13230 }, { "epoch": 0.10952558216486744, "grad_norm": 1737.0997314453125, "learning_rate": 9.998511753091267e-06, "loss": 155.6872, "step": 13240 }, { "epoch": 0.10960830541423668, "grad_norm": 4492.49853515625, "learning_rate": 9.998477145611389e-06, "loss": 195.1054, "step": 13250 }, { "epoch": 0.1096910286636059, "grad_norm": 1431.2005615234375, "learning_rate": 9.998442140425625e-06, "loss": 189.3633, "step": 13260 }, { "epoch": 0.10977375191297514, "grad_norm": 1214.457763671875, "learning_rate": 9.998406737536761e-06, "loss": 167.9152, "step": 13270 }, { "epoch": 0.10985647516234438, "grad_norm": 553.1201171875, "learning_rate": 9.998370936947614e-06, "loss": 154.5592, "step": 13280 }, { "epoch": 0.10993919841171361, "grad_norm": 1713.546875, "learning_rate": 9.998334738661028e-06, "loss": 178.8647, "step": 13290 }, { "epoch": 0.11002192166108285, "grad_norm": 1338.5965576171875, "learning_rate": 9.998298142679888e-06, "loss": 209.809, "step": 13300 }, { "epoch": 0.11010464491045208, "grad_norm": 1447.3448486328125, "learning_rate": 9.998261149007104e-06, "loss": 151.2987, "step": 13310 }, { "epoch": 0.11018736815982132, "grad_norm": 1176.512939453125, "learning_rate": 9.998223757645618e-06, "loss": 154.0995, "step": 13320 }, { "epoch": 0.11027009140919056, "grad_norm": 1491.28466796875, "learning_rate": 9.998185968598407e-06, "loss": 172.6219, "step": 13330 }, { "epoch": 0.11035281465855978, "grad_norm": 872.3587036132812, "learning_rate": 9.998147781868477e-06, "loss": 136.3148, "step": 13340 }, { "epoch": 0.11043553790792902, "grad_norm": 1718.0472412109375, "learning_rate": 9.998109197458865e-06, "loss": 147.2434, "step": 13350 }, { "epoch": 0.11051826115729826, "grad_norm": 1724.40966796875, "learning_rate": 9.998070215372645e-06, "loss": 155.5677, "step": 13360 }, { "epoch": 0.11060098440666749, "grad_norm": 1238.861572265625, "learning_rate": 9.998030835612914e-06, "loss": 177.9599, "step": 13370 }, { "epoch": 0.11068370765603673, "grad_norm": 1163.747802734375, "learning_rate": 9.997991058182807e-06, "loss": 159.718, "step": 13380 }, { "epoch": 0.11076643090540596, "grad_norm": 1202.32763671875, "learning_rate": 9.997950883085492e-06, "loss": 161.9838, "step": 13390 }, { "epoch": 0.1108491541547752, "grad_norm": 1815.2415771484375, "learning_rate": 9.99791031032416e-06, "loss": 187.6479, "step": 13400 }, { "epoch": 0.11093187740414444, "grad_norm": 1177.880126953125, "learning_rate": 9.997869339902043e-06, "loss": 180.2671, "step": 13410 }, { "epoch": 0.11101460065351366, "grad_norm": 1133.1861572265625, "learning_rate": 9.9978279718224e-06, "loss": 161.116, "step": 13420 }, { "epoch": 0.1110973239028829, "grad_norm": 1010.6656494140625, "learning_rate": 9.99778620608852e-06, "loss": 178.742, "step": 13430 }, { "epoch": 0.11118004715225215, "grad_norm": 1107.10986328125, "learning_rate": 9.997744042703731e-06, "loss": 139.692, "step": 13440 }, { "epoch": 0.11126277040162137, "grad_norm": 1005.2382202148438, "learning_rate": 9.997701481671384e-06, "loss": 178.2121, "step": 13450 }, { "epoch": 0.11134549365099061, "grad_norm": 2284.540283203125, "learning_rate": 9.997658522994867e-06, "loss": 156.5188, "step": 13460 }, { "epoch": 0.11142821690035985, "grad_norm": 5658.24658203125, "learning_rate": 9.997615166677597e-06, "loss": 146.769, "step": 13470 }, { "epoch": 0.11151094014972908, "grad_norm": 457.560302734375, "learning_rate": 9.997571412723024e-06, "loss": 119.6845, "step": 13480 }, { "epoch": 0.11159366339909832, "grad_norm": 1155.69677734375, "learning_rate": 9.99752726113463e-06, "loss": 126.1926, "step": 13490 }, { "epoch": 0.11167638664846755, "grad_norm": 1057.7230224609375, "learning_rate": 9.997482711915926e-06, "loss": 145.3562, "step": 13500 }, { "epoch": 0.11175910989783679, "grad_norm": 1107.236328125, "learning_rate": 9.99743776507046e-06, "loss": 189.204, "step": 13510 }, { "epoch": 0.11184183314720603, "grad_norm": 1068.25634765625, "learning_rate": 9.997392420601804e-06, "loss": 135.3788, "step": 13520 }, { "epoch": 0.11192455639657525, "grad_norm": 875.785888671875, "learning_rate": 9.99734667851357e-06, "loss": 152.927, "step": 13530 }, { "epoch": 0.1120072796459445, "grad_norm": 1772.181396484375, "learning_rate": 9.997300538809394e-06, "loss": 222.4996, "step": 13540 }, { "epoch": 0.11209000289531373, "grad_norm": 1264.271240234375, "learning_rate": 9.99725400149295e-06, "loss": 145.0414, "step": 13550 }, { "epoch": 0.11217272614468296, "grad_norm": 2198.46630859375, "learning_rate": 9.997207066567939e-06, "loss": 194.5429, "step": 13560 }, { "epoch": 0.1122554493940522, "grad_norm": 1130.449951171875, "learning_rate": 9.997159734038096e-06, "loss": 166.257, "step": 13570 }, { "epoch": 0.11233817264342143, "grad_norm": 1828.01513671875, "learning_rate": 9.997112003907186e-06, "loss": 152.8911, "step": 13580 }, { "epoch": 0.11242089589279067, "grad_norm": 1196.669677734375, "learning_rate": 9.997063876179007e-06, "loss": 129.7313, "step": 13590 }, { "epoch": 0.11250361914215991, "grad_norm": 1254.476806640625, "learning_rate": 9.997015350857391e-06, "loss": 169.0213, "step": 13600 }, { "epoch": 0.11258634239152913, "grad_norm": 1430.6507568359375, "learning_rate": 9.996966427946195e-06, "loss": 150.1627, "step": 13610 }, { "epoch": 0.11266906564089837, "grad_norm": 906.8787231445312, "learning_rate": 9.996917107449313e-06, "loss": 174.3134, "step": 13620 }, { "epoch": 0.11275178889026762, "grad_norm": 812.057373046875, "learning_rate": 9.99686738937067e-06, "loss": 138.5191, "step": 13630 }, { "epoch": 0.11283451213963684, "grad_norm": 1879.1136474609375, "learning_rate": 9.996817273714222e-06, "loss": 174.5974, "step": 13640 }, { "epoch": 0.11291723538900608, "grad_norm": 945.3467407226562, "learning_rate": 9.996766760483955e-06, "loss": 161.973, "step": 13650 }, { "epoch": 0.11299995863837531, "grad_norm": 862.3170776367188, "learning_rate": 9.996715849683889e-06, "loss": 137.5633, "step": 13660 }, { "epoch": 0.11308268188774455, "grad_norm": 690.8417358398438, "learning_rate": 9.996664541318076e-06, "loss": 141.2179, "step": 13670 }, { "epoch": 0.11316540513711379, "grad_norm": 972.900634765625, "learning_rate": 9.996612835390596e-06, "loss": 115.8736, "step": 13680 }, { "epoch": 0.11324812838648302, "grad_norm": 1904.7318115234375, "learning_rate": 9.996560731905565e-06, "loss": 154.7887, "step": 13690 }, { "epoch": 0.11333085163585226, "grad_norm": 851.8038940429688, "learning_rate": 9.996508230867126e-06, "loss": 137.4024, "step": 13700 }, { "epoch": 0.1134135748852215, "grad_norm": 1190.90380859375, "learning_rate": 9.996455332279458e-06, "loss": 153.5202, "step": 13710 }, { "epoch": 0.11349629813459072, "grad_norm": 2241.255126953125, "learning_rate": 9.99640203614677e-06, "loss": 173.7966, "step": 13720 }, { "epoch": 0.11357902138395996, "grad_norm": 1084.945068359375, "learning_rate": 9.996348342473304e-06, "loss": 159.4762, "step": 13730 }, { "epoch": 0.1136617446333292, "grad_norm": 1753.5206298828125, "learning_rate": 9.99629425126333e-06, "loss": 218.3281, "step": 13740 }, { "epoch": 0.11374446788269843, "grad_norm": 802.15576171875, "learning_rate": 9.996239762521152e-06, "loss": 153.0149, "step": 13750 }, { "epoch": 0.11382719113206767, "grad_norm": 2257.760986328125, "learning_rate": 9.996184876251105e-06, "loss": 134.656, "step": 13760 }, { "epoch": 0.1139099143814369, "grad_norm": 1485.7083740234375, "learning_rate": 9.996129592457558e-06, "loss": 119.6472, "step": 13770 }, { "epoch": 0.11399263763080614, "grad_norm": 1323.489990234375, "learning_rate": 9.996073911144907e-06, "loss": 135.8627, "step": 13780 }, { "epoch": 0.11407536088017538, "grad_norm": 1439.948974609375, "learning_rate": 9.996017832317583e-06, "loss": 105.2017, "step": 13790 }, { "epoch": 0.1141580841295446, "grad_norm": 755.0234375, "learning_rate": 9.995961355980052e-06, "loss": 119.7319, "step": 13800 }, { "epoch": 0.11424080737891384, "grad_norm": 1265.96826171875, "learning_rate": 9.995904482136803e-06, "loss": 144.368, "step": 13810 }, { "epoch": 0.11432353062828308, "grad_norm": 1014.328857421875, "learning_rate": 9.99584721079236e-06, "loss": 180.5596, "step": 13820 }, { "epoch": 0.11440625387765231, "grad_norm": 1355.7919921875, "learning_rate": 9.995789541951287e-06, "loss": 169.1609, "step": 13830 }, { "epoch": 0.11448897712702155, "grad_norm": 1709.610107421875, "learning_rate": 9.995731475618163e-06, "loss": 152.0147, "step": 13840 }, { "epoch": 0.11457170037639078, "grad_norm": 1426.723388671875, "learning_rate": 9.995673011797615e-06, "loss": 142.7122, "step": 13850 }, { "epoch": 0.11465442362576002, "grad_norm": 3322.473876953125, "learning_rate": 9.995614150494293e-06, "loss": 192.3159, "step": 13860 }, { "epoch": 0.11473714687512926, "grad_norm": 852.6958618164062, "learning_rate": 9.995554891712879e-06, "loss": 221.4455, "step": 13870 }, { "epoch": 0.11481987012449849, "grad_norm": 679.8392944335938, "learning_rate": 9.995495235458087e-06, "loss": 205.1969, "step": 13880 }, { "epoch": 0.11490259337386773, "grad_norm": 1818.237548828125, "learning_rate": 9.99543518173467e-06, "loss": 157.8227, "step": 13890 }, { "epoch": 0.11498531662323697, "grad_norm": 911.2511596679688, "learning_rate": 9.995374730547397e-06, "loss": 213.6541, "step": 13900 }, { "epoch": 0.11506803987260619, "grad_norm": 1822.821044921875, "learning_rate": 9.995313881901085e-06, "loss": 198.7188, "step": 13910 }, { "epoch": 0.11515076312197543, "grad_norm": 653.8102416992188, "learning_rate": 9.995252635800572e-06, "loss": 127.7723, "step": 13920 }, { "epoch": 0.11523348637134467, "grad_norm": 1713.5816650390625, "learning_rate": 9.995190992250732e-06, "loss": 225.4239, "step": 13930 }, { "epoch": 0.1153162096207139, "grad_norm": 1139.3336181640625, "learning_rate": 9.995128951256469e-06, "loss": 140.1807, "step": 13940 }, { "epoch": 0.11539893287008314, "grad_norm": 1414.932861328125, "learning_rate": 9.99506651282272e-06, "loss": 161.69, "step": 13950 }, { "epoch": 0.11548165611945237, "grad_norm": 1176.623291015625, "learning_rate": 9.995003676954454e-06, "loss": 151.0156, "step": 13960 }, { "epoch": 0.11556437936882161, "grad_norm": 784.6156616210938, "learning_rate": 9.994940443656668e-06, "loss": 198.3028, "step": 13970 }, { "epoch": 0.11564710261819085, "grad_norm": 862.885986328125, "learning_rate": 9.994876812934395e-06, "loss": 153.6012, "step": 13980 }, { "epoch": 0.11572982586756007, "grad_norm": 2193.662109375, "learning_rate": 9.994812784792698e-06, "loss": 165.4299, "step": 13990 }, { "epoch": 0.11581254911692931, "grad_norm": 1005.8052978515625, "learning_rate": 9.99474835923667e-06, "loss": 117.067, "step": 14000 }, { "epoch": 0.11589527236629855, "grad_norm": 870.8054809570312, "learning_rate": 9.994683536271437e-06, "loss": 177.513, "step": 14010 }, { "epoch": 0.11597799561566778, "grad_norm": 1938.4117431640625, "learning_rate": 9.994618315902161e-06, "loss": 147.8295, "step": 14020 }, { "epoch": 0.11606071886503702, "grad_norm": 1145.3619384765625, "learning_rate": 9.994552698134023e-06, "loss": 126.2492, "step": 14030 }, { "epoch": 0.11614344211440625, "grad_norm": 704.4757080078125, "learning_rate": 9.994486682972253e-06, "loss": 183.3489, "step": 14040 }, { "epoch": 0.11622616536377549, "grad_norm": 799.8057250976562, "learning_rate": 9.994420270422096e-06, "loss": 155.8286, "step": 14050 }, { "epoch": 0.11630888861314473, "grad_norm": 2168.39794921875, "learning_rate": 9.994353460488842e-06, "loss": 165.6206, "step": 14060 }, { "epoch": 0.11639161186251396, "grad_norm": 1048.3438720703125, "learning_rate": 9.994286253177803e-06, "loss": 196.4472, "step": 14070 }, { "epoch": 0.1164743351118832, "grad_norm": 1240.358642578125, "learning_rate": 9.994218648494327e-06, "loss": 169.1644, "step": 14080 }, { "epoch": 0.11655705836125244, "grad_norm": 1450.99169921875, "learning_rate": 9.994150646443793e-06, "loss": 119.286, "step": 14090 }, { "epoch": 0.11663978161062166, "grad_norm": 1026.7149658203125, "learning_rate": 9.994082247031613e-06, "loss": 166.7578, "step": 14100 }, { "epoch": 0.1167225048599909, "grad_norm": 1081.6656494140625, "learning_rate": 9.99401345026323e-06, "loss": 141.0757, "step": 14110 }, { "epoch": 0.11680522810936013, "grad_norm": 746.9979248046875, "learning_rate": 9.993944256144115e-06, "loss": 124.9759, "step": 14120 }, { "epoch": 0.11688795135872937, "grad_norm": 891.9210815429688, "learning_rate": 9.993874664679774e-06, "loss": 150.3685, "step": 14130 }, { "epoch": 0.11697067460809861, "grad_norm": 1401.4002685546875, "learning_rate": 9.993804675875744e-06, "loss": 168.8493, "step": 14140 }, { "epoch": 0.11705339785746784, "grad_norm": 1588.7640380859375, "learning_rate": 9.993734289737596e-06, "loss": 141.4464, "step": 14150 }, { "epoch": 0.11713612110683708, "grad_norm": 1859.66552734375, "learning_rate": 9.993663506270928e-06, "loss": 162.024, "step": 14160 }, { "epoch": 0.11721884435620632, "grad_norm": 1133.1839599609375, "learning_rate": 9.993592325481373e-06, "loss": 166.6096, "step": 14170 }, { "epoch": 0.11730156760557554, "grad_norm": 1811.849365234375, "learning_rate": 9.993520747374594e-06, "loss": 127.2197, "step": 14180 }, { "epoch": 0.11738429085494478, "grad_norm": 909.2362060546875, "learning_rate": 9.993448771956285e-06, "loss": 189.4919, "step": 14190 }, { "epoch": 0.11746701410431402, "grad_norm": 1350.44140625, "learning_rate": 9.993376399232175e-06, "loss": 142.4382, "step": 14200 }, { "epoch": 0.11754973735368325, "grad_norm": 1765.2679443359375, "learning_rate": 9.993303629208023e-06, "loss": 148.8411, "step": 14210 }, { "epoch": 0.11763246060305249, "grad_norm": 2343.818359375, "learning_rate": 9.993230461889616e-06, "loss": 212.7168, "step": 14220 }, { "epoch": 0.11771518385242172, "grad_norm": 683.275146484375, "learning_rate": 9.993156897282776e-06, "loss": 148.4446, "step": 14230 }, { "epoch": 0.11779790710179096, "grad_norm": 998.6349487304688, "learning_rate": 9.99308293539336e-06, "loss": 117.4103, "step": 14240 }, { "epoch": 0.1178806303511602, "grad_norm": 823.5191040039062, "learning_rate": 9.993008576227248e-06, "loss": 130.8048, "step": 14250 }, { "epoch": 0.11796335360052942, "grad_norm": 1420.8035888671875, "learning_rate": 9.992933819790358e-06, "loss": 163.5295, "step": 14260 }, { "epoch": 0.11804607684989867, "grad_norm": 1167.037109375, "learning_rate": 9.992858666088638e-06, "loss": 164.6194, "step": 14270 }, { "epoch": 0.1181288000992679, "grad_norm": 1515.985107421875, "learning_rate": 9.992783115128072e-06, "loss": 163.406, "step": 14280 }, { "epoch": 0.11821152334863713, "grad_norm": 1549.8917236328125, "learning_rate": 9.992707166914662e-06, "loss": 168.7726, "step": 14290 }, { "epoch": 0.11829424659800637, "grad_norm": 1190.7861328125, "learning_rate": 9.992630821454458e-06, "loss": 140.9276, "step": 14300 }, { "epoch": 0.1183769698473756, "grad_norm": 1568.7037353515625, "learning_rate": 9.992554078753534e-06, "loss": 147.5554, "step": 14310 }, { "epoch": 0.11845969309674484, "grad_norm": 1000.02880859375, "learning_rate": 9.992476938817994e-06, "loss": 180.1213, "step": 14320 }, { "epoch": 0.11854241634611408, "grad_norm": 1296.5947265625, "learning_rate": 9.992399401653976e-06, "loss": 137.781, "step": 14330 }, { "epoch": 0.1186251395954833, "grad_norm": 1144.0504150390625, "learning_rate": 9.99232146726765e-06, "loss": 140.0204, "step": 14340 }, { "epoch": 0.11870786284485255, "grad_norm": 1281.876708984375, "learning_rate": 9.992243135665217e-06, "loss": 154.8919, "step": 14350 }, { "epoch": 0.11879058609422179, "grad_norm": 871.8610229492188, "learning_rate": 9.992164406852908e-06, "loss": 186.1516, "step": 14360 }, { "epoch": 0.11887330934359101, "grad_norm": 1534.3536376953125, "learning_rate": 9.992085280836988e-06, "loss": 160.6092, "step": 14370 }, { "epoch": 0.11895603259296025, "grad_norm": 988.0948486328125, "learning_rate": 9.992005757623753e-06, "loss": 203.4977, "step": 14380 }, { "epoch": 0.11903875584232948, "grad_norm": 1357.1668701171875, "learning_rate": 9.991925837219532e-06, "loss": 160.5042, "step": 14390 }, { "epoch": 0.11912147909169872, "grad_norm": 636.4329223632812, "learning_rate": 9.991845519630679e-06, "loss": 137.9073, "step": 14400 }, { "epoch": 0.11920420234106796, "grad_norm": 1027.252197265625, "learning_rate": 9.991764804863588e-06, "loss": 122.9028, "step": 14410 }, { "epoch": 0.11928692559043719, "grad_norm": 2356.058837890625, "learning_rate": 9.991683692924682e-06, "loss": 155.1582, "step": 14420 }, { "epoch": 0.11936964883980643, "grad_norm": 743.142578125, "learning_rate": 9.991602183820412e-06, "loss": 142.9998, "step": 14430 }, { "epoch": 0.11945237208917567, "grad_norm": 1039.77978515625, "learning_rate": 9.991520277557266e-06, "loss": 156.9646, "step": 14440 }, { "epoch": 0.1195350953385449, "grad_norm": 1624.250732421875, "learning_rate": 9.991437974141759e-06, "loss": 165.5059, "step": 14450 }, { "epoch": 0.11961781858791413, "grad_norm": 589.4429321289062, "learning_rate": 9.99135527358044e-06, "loss": 119.63, "step": 14460 }, { "epoch": 0.11970054183728338, "grad_norm": 1199.8302001953125, "learning_rate": 9.991272175879888e-06, "loss": 175.7935, "step": 14470 }, { "epoch": 0.1197832650866526, "grad_norm": 1233.6771240234375, "learning_rate": 9.991188681046718e-06, "loss": 192.3081, "step": 14480 }, { "epoch": 0.11986598833602184, "grad_norm": 1209.5980224609375, "learning_rate": 9.991104789087568e-06, "loss": 139.1357, "step": 14490 }, { "epoch": 0.11994871158539107, "grad_norm": 1937.0008544921875, "learning_rate": 9.991020500009118e-06, "loss": 138.8762, "step": 14500 }, { "epoch": 0.12003143483476031, "grad_norm": 545.6674194335938, "learning_rate": 9.990935813818073e-06, "loss": 139.6014, "step": 14510 }, { "epoch": 0.12011415808412955, "grad_norm": 650.5000610351562, "learning_rate": 9.99085073052117e-06, "loss": 195.5592, "step": 14520 }, { "epoch": 0.12019688133349878, "grad_norm": 566.500732421875, "learning_rate": 9.990765250125179e-06, "loss": 154.2413, "step": 14530 }, { "epoch": 0.12027960458286802, "grad_norm": 1750.378662109375, "learning_rate": 9.990679372636902e-06, "loss": 161.8778, "step": 14540 }, { "epoch": 0.12036232783223726, "grad_norm": 1349.2432861328125, "learning_rate": 9.99059309806317e-06, "loss": 172.9573, "step": 14550 }, { "epoch": 0.12044505108160648, "grad_norm": 975.1179809570312, "learning_rate": 9.990506426410851e-06, "loss": 128.1013, "step": 14560 }, { "epoch": 0.12052777433097572, "grad_norm": 3406.892333984375, "learning_rate": 9.990419357686839e-06, "loss": 177.5993, "step": 14570 }, { "epoch": 0.12061049758034495, "grad_norm": 428.1646423339844, "learning_rate": 9.99033189189806e-06, "loss": 128.9519, "step": 14580 }, { "epoch": 0.12069322082971419, "grad_norm": 996.7293701171875, "learning_rate": 9.990244029051475e-06, "loss": 149.3833, "step": 14590 }, { "epoch": 0.12077594407908343, "grad_norm": 1458.9307861328125, "learning_rate": 9.990155769154077e-06, "loss": 160.3518, "step": 14600 }, { "epoch": 0.12085866732845266, "grad_norm": 714.3155517578125, "learning_rate": 9.990067112212884e-06, "loss": 128.5733, "step": 14610 }, { "epoch": 0.1209413905778219, "grad_norm": 629.7801513671875, "learning_rate": 9.989978058234952e-06, "loss": 141.1231, "step": 14620 }, { "epoch": 0.12102411382719114, "grad_norm": 1056.1544189453125, "learning_rate": 9.989888607227369e-06, "loss": 173.8705, "step": 14630 }, { "epoch": 0.12110683707656036, "grad_norm": 1272.472412109375, "learning_rate": 9.989798759197247e-06, "loss": 146.9385, "step": 14640 }, { "epoch": 0.1211895603259296, "grad_norm": 1054.3629150390625, "learning_rate": 9.989708514151739e-06, "loss": 164.1719, "step": 14650 }, { "epoch": 0.12127228357529883, "grad_norm": 659.7613525390625, "learning_rate": 9.989617872098026e-06, "loss": 149.6539, "step": 14660 }, { "epoch": 0.12135500682466807, "grad_norm": 815.8479614257812, "learning_rate": 9.989526833043316e-06, "loss": 140.1702, "step": 14670 }, { "epoch": 0.12143773007403731, "grad_norm": 1148.9129638671875, "learning_rate": 9.989435396994856e-06, "loss": 125.2471, "step": 14680 }, { "epoch": 0.12152045332340654, "grad_norm": 3006.224609375, "learning_rate": 9.989343563959919e-06, "loss": 150.3076, "step": 14690 }, { "epoch": 0.12160317657277578, "grad_norm": 1365.89892578125, "learning_rate": 9.989251333945813e-06, "loss": 179.2145, "step": 14700 }, { "epoch": 0.12168589982214502, "grad_norm": 982.8682861328125, "learning_rate": 9.989158706959875e-06, "loss": 137.9394, "step": 14710 }, { "epoch": 0.12176862307151425, "grad_norm": 1103.151123046875, "learning_rate": 9.989065683009477e-06, "loss": 150.3043, "step": 14720 }, { "epoch": 0.12185134632088349, "grad_norm": 1068.439208984375, "learning_rate": 9.988972262102018e-06, "loss": 115.0475, "step": 14730 }, { "epoch": 0.12193406957025273, "grad_norm": 1475.9112548828125, "learning_rate": 9.988878444244937e-06, "loss": 162.3183, "step": 14740 }, { "epoch": 0.12201679281962195, "grad_norm": 1475.7916259765625, "learning_rate": 9.988784229445689e-06, "loss": 132.9056, "step": 14750 }, { "epoch": 0.12209951606899119, "grad_norm": 1580.335205078125, "learning_rate": 9.988689617711777e-06, "loss": 180.2133, "step": 14760 }, { "epoch": 0.12218223931836042, "grad_norm": 1742.8638916015625, "learning_rate": 9.988594609050726e-06, "loss": 170.6644, "step": 14770 }, { "epoch": 0.12226496256772966, "grad_norm": 778.4093017578125, "learning_rate": 9.988499203470097e-06, "loss": 163.6835, "step": 14780 }, { "epoch": 0.1223476858170989, "grad_norm": 908.4758911132812, "learning_rate": 9.988403400977482e-06, "loss": 143.079, "step": 14790 }, { "epoch": 0.12243040906646813, "grad_norm": 1540.624755859375, "learning_rate": 9.9883072015805e-06, "loss": 160.3763, "step": 14800 }, { "epoch": 0.12251313231583737, "grad_norm": 919.294677734375, "learning_rate": 9.98821060528681e-06, "loss": 165.4283, "step": 14810 }, { "epoch": 0.12259585556520661, "grad_norm": 865.2339477539062, "learning_rate": 9.988113612104093e-06, "loss": 128.951, "step": 14820 }, { "epoch": 0.12267857881457583, "grad_norm": 2098.492919921875, "learning_rate": 9.988016222040067e-06, "loss": 151.1649, "step": 14830 }, { "epoch": 0.12276130206394507, "grad_norm": 847.621337890625, "learning_rate": 9.987918435102484e-06, "loss": 121.6645, "step": 14840 }, { "epoch": 0.1228440253133143, "grad_norm": 1472.7208251953125, "learning_rate": 9.987820251299121e-06, "loss": 140.8588, "step": 14850 }, { "epoch": 0.12292674856268354, "grad_norm": 1310.5726318359375, "learning_rate": 9.987721670637794e-06, "loss": 132.6207, "step": 14860 }, { "epoch": 0.12300947181205278, "grad_norm": 788.9578247070312, "learning_rate": 9.987622693126342e-06, "loss": 139.2334, "step": 14870 }, { "epoch": 0.12309219506142201, "grad_norm": 1761.3287353515625, "learning_rate": 9.987523318772644e-06, "loss": 156.3363, "step": 14880 }, { "epoch": 0.12317491831079125, "grad_norm": 1024.44140625, "learning_rate": 9.987423547584605e-06, "loss": 167.0266, "step": 14890 }, { "epoch": 0.12325764156016049, "grad_norm": 891.505126953125, "learning_rate": 9.987323379570161e-06, "loss": 144.4436, "step": 14900 }, { "epoch": 0.12334036480952972, "grad_norm": 1373.43359375, "learning_rate": 9.987222814737287e-06, "loss": 139.1032, "step": 14910 }, { "epoch": 0.12342308805889896, "grad_norm": 2005.6689453125, "learning_rate": 9.987121853093982e-06, "loss": 179.2018, "step": 14920 }, { "epoch": 0.12350581130826818, "grad_norm": 1276.0216064453125, "learning_rate": 9.987020494648279e-06, "loss": 157.102, "step": 14930 }, { "epoch": 0.12358853455763742, "grad_norm": 1294.2474365234375, "learning_rate": 9.986918739408241e-06, "loss": 176.7196, "step": 14940 }, { "epoch": 0.12367125780700666, "grad_norm": 1715.0291748046875, "learning_rate": 9.986816587381966e-06, "loss": 150.2139, "step": 14950 }, { "epoch": 0.12375398105637589, "grad_norm": 1020.9821166992188, "learning_rate": 9.986714038577582e-06, "loss": 145.5669, "step": 14960 }, { "epoch": 0.12383670430574513, "grad_norm": 969.8027954101562, "learning_rate": 9.986611093003249e-06, "loss": 129.0563, "step": 14970 }, { "epoch": 0.12391942755511437, "grad_norm": 1188.594482421875, "learning_rate": 9.986507750667157e-06, "loss": 130.371, "step": 14980 }, { "epoch": 0.1240021508044836, "grad_norm": 2070.416015625, "learning_rate": 9.986404011577525e-06, "loss": 164.5774, "step": 14990 }, { "epoch": 0.12408487405385284, "grad_norm": 1122.52587890625, "learning_rate": 9.986299875742612e-06, "loss": 187.0694, "step": 15000 }, { "epoch": 0.12416759730322208, "grad_norm": 1119.6961669921875, "learning_rate": 9.986195343170703e-06, "loss": 180.5289, "step": 15010 }, { "epoch": 0.1242503205525913, "grad_norm": 1574.2568359375, "learning_rate": 9.986090413870114e-06, "loss": 144.7522, "step": 15020 }, { "epoch": 0.12433304380196054, "grad_norm": 1006.7045288085938, "learning_rate": 9.985985087849193e-06, "loss": 143.7221, "step": 15030 }, { "epoch": 0.12441576705132977, "grad_norm": 1300.2181396484375, "learning_rate": 9.98587936511632e-06, "loss": 150.1932, "step": 15040 }, { "epoch": 0.12449849030069901, "grad_norm": 821.7041625976562, "learning_rate": 9.98577324567991e-06, "loss": 139.0086, "step": 15050 }, { "epoch": 0.12458121355006825, "grad_norm": 830.7269287109375, "learning_rate": 9.985666729548404e-06, "loss": 146.4651, "step": 15060 }, { "epoch": 0.12466393679943748, "grad_norm": 1310.355224609375, "learning_rate": 9.985559816730277e-06, "loss": 141.5489, "step": 15070 }, { "epoch": 0.12474666004880672, "grad_norm": 1190.0335693359375, "learning_rate": 9.985452507234037e-06, "loss": 144.9001, "step": 15080 }, { "epoch": 0.12482938329817596, "grad_norm": 2714.714599609375, "learning_rate": 9.98534480106822e-06, "loss": 154.9118, "step": 15090 }, { "epoch": 0.12491210654754518, "grad_norm": 792.223388671875, "learning_rate": 9.985236698241396e-06, "loss": 149.7406, "step": 15100 }, { "epoch": 0.12499482979691443, "grad_norm": 1287.8345947265625, "learning_rate": 9.985128198762168e-06, "loss": 171.4261, "step": 15110 }, { "epoch": 0.12507755304628365, "grad_norm": 957.4619140625, "learning_rate": 9.98501930263917e-06, "loss": 234.8733, "step": 15120 }, { "epoch": 0.1251602762956529, "grad_norm": 862.0460205078125, "learning_rate": 9.984910009881062e-06, "loss": 112.6332, "step": 15130 }, { "epoch": 0.12524299954502213, "grad_norm": 1100.07080078125, "learning_rate": 9.984800320496542e-06, "loss": 139.7673, "step": 15140 }, { "epoch": 0.12532572279439136, "grad_norm": 1111.9737548828125, "learning_rate": 9.984690234494338e-06, "loss": 106.7051, "step": 15150 }, { "epoch": 0.12540844604376059, "grad_norm": 744.8794555664062, "learning_rate": 9.98457975188321e-06, "loss": 142.1312, "step": 15160 }, { "epoch": 0.12549116929312984, "grad_norm": 923.189697265625, "learning_rate": 9.984468872671945e-06, "loss": 139.3656, "step": 15170 }, { "epoch": 0.12557389254249907, "grad_norm": 1322.687255859375, "learning_rate": 9.984357596869369e-06, "loss": 148.6495, "step": 15180 }, { "epoch": 0.1256566157918683, "grad_norm": 882.3487548828125, "learning_rate": 9.984245924484334e-06, "loss": 141.7766, "step": 15190 }, { "epoch": 0.12573933904123755, "grad_norm": 725.9840698242188, "learning_rate": 9.984133855525723e-06, "loss": 138.5364, "step": 15200 }, { "epoch": 0.12582206229060677, "grad_norm": 1547.522705078125, "learning_rate": 9.984021390002458e-06, "loss": 136.4458, "step": 15210 }, { "epoch": 0.125904785539976, "grad_norm": 1425.56494140625, "learning_rate": 9.983908527923486e-06, "loss": 222.0387, "step": 15220 }, { "epoch": 0.12598750878934525, "grad_norm": 911.7035522460938, "learning_rate": 9.983795269297782e-06, "loss": 169.7902, "step": 15230 }, { "epoch": 0.12607023203871448, "grad_norm": 1066.0264892578125, "learning_rate": 9.983681614134363e-06, "loss": 122.5573, "step": 15240 }, { "epoch": 0.1261529552880837, "grad_norm": 1829.2509765625, "learning_rate": 9.98356756244227e-06, "loss": 154.7958, "step": 15250 }, { "epoch": 0.12623567853745296, "grad_norm": 1402.93408203125, "learning_rate": 9.983453114230575e-06, "loss": 145.442, "step": 15260 }, { "epoch": 0.1263184017868222, "grad_norm": 990.7800903320312, "learning_rate": 9.98333826950839e-06, "loss": 138.916, "step": 15270 }, { "epoch": 0.12640112503619141, "grad_norm": 861.1292724609375, "learning_rate": 9.983223028284847e-06, "loss": 152.3527, "step": 15280 }, { "epoch": 0.12648384828556067, "grad_norm": 887.3511962890625, "learning_rate": 9.983107390569118e-06, "loss": 129.7973, "step": 15290 }, { "epoch": 0.1265665715349299, "grad_norm": 1043.2373046875, "learning_rate": 9.982991356370404e-06, "loss": 116.1451, "step": 15300 }, { "epoch": 0.12664929478429912, "grad_norm": 1244.5079345703125, "learning_rate": 9.982874925697937e-06, "loss": 221.0664, "step": 15310 }, { "epoch": 0.12673201803366838, "grad_norm": 1715.1995849609375, "learning_rate": 9.982758098560978e-06, "loss": 186.7455, "step": 15320 }, { "epoch": 0.1268147412830376, "grad_norm": 679.9988403320312, "learning_rate": 9.982640874968827e-06, "loss": 171.8672, "step": 15330 }, { "epoch": 0.12689746453240683, "grad_norm": 595.40625, "learning_rate": 9.98252325493081e-06, "loss": 130.3511, "step": 15340 }, { "epoch": 0.12698018778177605, "grad_norm": 915.3275146484375, "learning_rate": 9.982405238456281e-06, "loss": 153.7831, "step": 15350 }, { "epoch": 0.1270629110311453, "grad_norm": 1383.0423583984375, "learning_rate": 9.982286825554636e-06, "loss": 155.1486, "step": 15360 }, { "epoch": 0.12714563428051454, "grad_norm": 1527.0670166015625, "learning_rate": 9.982168016235292e-06, "loss": 235.3831, "step": 15370 }, { "epoch": 0.12722835752988376, "grad_norm": 1168.0416259765625, "learning_rate": 9.982048810507706e-06, "loss": 175.3166, "step": 15380 }, { "epoch": 0.12731108077925302, "grad_norm": 1577.9107666015625, "learning_rate": 9.98192920838136e-06, "loss": 136.4098, "step": 15390 }, { "epoch": 0.12739380402862224, "grad_norm": 2239.125244140625, "learning_rate": 9.98180920986577e-06, "loss": 162.4811, "step": 15400 }, { "epoch": 0.12747652727799147, "grad_norm": 1140.6561279296875, "learning_rate": 9.981688814970485e-06, "loss": 159.3877, "step": 15410 }, { "epoch": 0.12755925052736072, "grad_norm": 929.8948974609375, "learning_rate": 9.981568023705085e-06, "loss": 113.0717, "step": 15420 }, { "epoch": 0.12764197377672995, "grad_norm": 1146.56396484375, "learning_rate": 9.981446836079178e-06, "loss": 121.9914, "step": 15430 }, { "epoch": 0.12772469702609918, "grad_norm": 877.8550415039062, "learning_rate": 9.981325252102408e-06, "loss": 173.6141, "step": 15440 }, { "epoch": 0.12780742027546843, "grad_norm": 969.0079956054688, "learning_rate": 9.98120327178445e-06, "loss": 178.5706, "step": 15450 }, { "epoch": 0.12789014352483766, "grad_norm": 1263.2391357421875, "learning_rate": 9.981080895135007e-06, "loss": 180.7431, "step": 15460 }, { "epoch": 0.12797286677420688, "grad_norm": 742.5184326171875, "learning_rate": 9.980958122163818e-06, "loss": 111.0224, "step": 15470 }, { "epoch": 0.12805559002357614, "grad_norm": 1423.19873046875, "learning_rate": 9.980834952880652e-06, "loss": 128.3473, "step": 15480 }, { "epoch": 0.12813831327294536, "grad_norm": 1583.2940673828125, "learning_rate": 9.980711387295306e-06, "loss": 149.955, "step": 15490 }, { "epoch": 0.1282210365223146, "grad_norm": 1052.7265625, "learning_rate": 9.980587425417612e-06, "loss": 159.9205, "step": 15500 }, { "epoch": 0.12830375977168385, "grad_norm": 2138.17724609375, "learning_rate": 9.980463067257437e-06, "loss": 169.7366, "step": 15510 }, { "epoch": 0.12838648302105307, "grad_norm": 1006.1878662109375, "learning_rate": 9.980338312824672e-06, "loss": 193.1612, "step": 15520 }, { "epoch": 0.1284692062704223, "grad_norm": 1047.7593994140625, "learning_rate": 9.980213162129244e-06, "loss": 175.5892, "step": 15530 }, { "epoch": 0.12855192951979152, "grad_norm": 1267.4644775390625, "learning_rate": 9.980087615181111e-06, "loss": 149.4357, "step": 15540 }, { "epoch": 0.12863465276916078, "grad_norm": 1171.0859375, "learning_rate": 9.979961671990263e-06, "loss": 165.6414, "step": 15550 }, { "epoch": 0.12871737601853, "grad_norm": 911.0418701171875, "learning_rate": 9.979835332566719e-06, "loss": 155.2462, "step": 15560 }, { "epoch": 0.12880009926789923, "grad_norm": 1016.1674194335938, "learning_rate": 9.97970859692053e-06, "loss": 142.4974, "step": 15570 }, { "epoch": 0.12888282251726849, "grad_norm": 653.232421875, "learning_rate": 9.979581465061784e-06, "loss": 155.5012, "step": 15580 }, { "epoch": 0.1289655457666377, "grad_norm": 1058.5008544921875, "learning_rate": 9.979453937000594e-06, "loss": 101.9423, "step": 15590 }, { "epoch": 0.12904826901600694, "grad_norm": 820.3455200195312, "learning_rate": 9.979326012747106e-06, "loss": 117.5258, "step": 15600 }, { "epoch": 0.1291309922653762, "grad_norm": 1352.7105712890625, "learning_rate": 9.9791976923115e-06, "loss": 99.8209, "step": 15610 }, { "epoch": 0.12921371551474542, "grad_norm": 1026.9713134765625, "learning_rate": 9.979068975703984e-06, "loss": 166.7305, "step": 15620 }, { "epoch": 0.12929643876411465, "grad_norm": 1245.904296875, "learning_rate": 9.978939862934802e-06, "loss": 126.3938, "step": 15630 }, { "epoch": 0.1293791620134839, "grad_norm": 1658.7640380859375, "learning_rate": 9.978810354014223e-06, "loss": 135.5493, "step": 15640 }, { "epoch": 0.12946188526285313, "grad_norm": 1517.82373046875, "learning_rate": 9.978680448952556e-06, "loss": 139.2036, "step": 15650 }, { "epoch": 0.12954460851222235, "grad_norm": 1112.2469482421875, "learning_rate": 9.978550147760133e-06, "loss": 127.4167, "step": 15660 }, { "epoch": 0.1296273317615916, "grad_norm": 995.6966552734375, "learning_rate": 9.978419450447325e-06, "loss": 128.1456, "step": 15670 }, { "epoch": 0.12971005501096083, "grad_norm": 785.5421142578125, "learning_rate": 9.978288357024527e-06, "loss": 142.7447, "step": 15680 }, { "epoch": 0.12979277826033006, "grad_norm": 3418.008544921875, "learning_rate": 9.978156867502173e-06, "loss": 161.2918, "step": 15690 }, { "epoch": 0.12987550150969931, "grad_norm": 1545.32568359375, "learning_rate": 9.978024981890724e-06, "loss": 107.0028, "step": 15700 }, { "epoch": 0.12995822475906854, "grad_norm": 963.7340087890625, "learning_rate": 9.977892700200673e-06, "loss": 140.569, "step": 15710 }, { "epoch": 0.13004094800843777, "grad_norm": 692.7611694335938, "learning_rate": 9.977760022442545e-06, "loss": 110.664, "step": 15720 }, { "epoch": 0.130123671257807, "grad_norm": 1015.7322998046875, "learning_rate": 9.977626948626897e-06, "loss": 158.9243, "step": 15730 }, { "epoch": 0.13020639450717625, "grad_norm": 1334.6917724609375, "learning_rate": 9.977493478764316e-06, "loss": 152.3215, "step": 15740 }, { "epoch": 0.13028911775654548, "grad_norm": 963.1575927734375, "learning_rate": 9.977359612865424e-06, "loss": 137.1868, "step": 15750 }, { "epoch": 0.1303718410059147, "grad_norm": 1332.0909423828125, "learning_rate": 9.97722535094087e-06, "loss": 130.2153, "step": 15760 }, { "epoch": 0.13045456425528396, "grad_norm": 864.9472045898438, "learning_rate": 9.977090693001336e-06, "loss": 142.6017, "step": 15770 }, { "epoch": 0.13053728750465318, "grad_norm": 1091.3128662109375, "learning_rate": 9.976955639057539e-06, "loss": 126.0693, "step": 15780 }, { "epoch": 0.1306200107540224, "grad_norm": 1137.7115478515625, "learning_rate": 9.976820189120223e-06, "loss": 147.4185, "step": 15790 }, { "epoch": 0.13070273400339166, "grad_norm": 1658.2982177734375, "learning_rate": 9.976684343200164e-06, "loss": 135.441, "step": 15800 }, { "epoch": 0.1307854572527609, "grad_norm": 1823.3642578125, "learning_rate": 9.976548101308173e-06, "loss": 138.8229, "step": 15810 }, { "epoch": 0.13086818050213012, "grad_norm": 1511.6375732421875, "learning_rate": 9.976411463455088e-06, "loss": 140.3549, "step": 15820 }, { "epoch": 0.13095090375149937, "grad_norm": 970.9559326171875, "learning_rate": 9.976274429651783e-06, "loss": 188.4605, "step": 15830 }, { "epoch": 0.1310336270008686, "grad_norm": 1540.7110595703125, "learning_rate": 9.976136999909156e-06, "loss": 106.2589, "step": 15840 }, { "epoch": 0.13111635025023782, "grad_norm": 829.7328491210938, "learning_rate": 9.97599917423815e-06, "loss": 166.2885, "step": 15850 }, { "epoch": 0.13119907349960708, "grad_norm": 0.0, "learning_rate": 9.975860952649724e-06, "loss": 180.9173, "step": 15860 }, { "epoch": 0.1312817967489763, "grad_norm": 1011.6902465820312, "learning_rate": 9.975722335154876e-06, "loss": 161.2201, "step": 15870 }, { "epoch": 0.13136451999834553, "grad_norm": 1166.7960205078125, "learning_rate": 9.975583321764638e-06, "loss": 144.3113, "step": 15880 }, { "epoch": 0.13144724324771476, "grad_norm": 1041.4771728515625, "learning_rate": 9.975443912490073e-06, "loss": 149.5042, "step": 15890 }, { "epoch": 0.131529966497084, "grad_norm": 2316.5087890625, "learning_rate": 9.975304107342268e-06, "loss": 179.2303, "step": 15900 }, { "epoch": 0.13161268974645324, "grad_norm": 692.1578369140625, "learning_rate": 9.97516390633235e-06, "loss": 133.4318, "step": 15910 }, { "epoch": 0.13169541299582246, "grad_norm": 1125.5006103515625, "learning_rate": 9.975023309471473e-06, "loss": 156.1001, "step": 15920 }, { "epoch": 0.13177813624519172, "grad_norm": 1064.159423828125, "learning_rate": 9.974882316770823e-06, "loss": 147.876, "step": 15930 }, { "epoch": 0.13186085949456094, "grad_norm": 1456.9761962890625, "learning_rate": 9.974740928241617e-06, "loss": 146.098, "step": 15940 }, { "epoch": 0.13194358274393017, "grad_norm": 1191.022705078125, "learning_rate": 9.974599143895107e-06, "loss": 139.6693, "step": 15950 }, { "epoch": 0.13202630599329943, "grad_norm": 2010.4088134765625, "learning_rate": 9.974456963742573e-06, "loss": 152.4677, "step": 15960 }, { "epoch": 0.13210902924266865, "grad_norm": 1077.85205078125, "learning_rate": 9.97431438779533e-06, "loss": 199.6097, "step": 15970 }, { "epoch": 0.13219175249203788, "grad_norm": 975.5093994140625, "learning_rate": 9.974171416064719e-06, "loss": 110.029, "step": 15980 }, { "epoch": 0.13227447574140713, "grad_norm": 1180.7437744140625, "learning_rate": 9.974028048562118e-06, "loss": 136.7102, "step": 15990 }, { "epoch": 0.13235719899077636, "grad_norm": 1241.7110595703125, "learning_rate": 9.973884285298932e-06, "loss": 154.7749, "step": 16000 }, { "epoch": 0.13243992224014559, "grad_norm": 1181.715576171875, "learning_rate": 9.9737401262866e-06, "loss": 163.9049, "step": 16010 }, { "epoch": 0.13252264548951484, "grad_norm": 1441.7060546875, "learning_rate": 9.973595571536593e-06, "loss": 131.6654, "step": 16020 }, { "epoch": 0.13260536873888407, "grad_norm": 1810.145751953125, "learning_rate": 9.973450621060412e-06, "loss": 155.4361, "step": 16030 }, { "epoch": 0.1326880919882533, "grad_norm": 1024.084716796875, "learning_rate": 9.97330527486959e-06, "loss": 130.6234, "step": 16040 }, { "epoch": 0.13277081523762255, "grad_norm": 1294.561279296875, "learning_rate": 9.973159532975691e-06, "loss": 122.6079, "step": 16050 }, { "epoch": 0.13285353848699177, "grad_norm": 1282.573486328125, "learning_rate": 9.973013395390314e-06, "loss": 173.6021, "step": 16060 }, { "epoch": 0.132936261736361, "grad_norm": 1436.6795654296875, "learning_rate": 9.972866862125083e-06, "loss": 201.6667, "step": 16070 }, { "epoch": 0.13301898498573023, "grad_norm": 880.5997924804688, "learning_rate": 9.972719933191657e-06, "loss": 121.1312, "step": 16080 }, { "epoch": 0.13310170823509948, "grad_norm": 720.5911254882812, "learning_rate": 9.97257260860173e-06, "loss": 117.1484, "step": 16090 }, { "epoch": 0.1331844314844687, "grad_norm": 1505.3927001953125, "learning_rate": 9.972424888367019e-06, "loss": 146.7309, "step": 16100 }, { "epoch": 0.13326715473383793, "grad_norm": 958.6402587890625, "learning_rate": 9.972276772499281e-06, "loss": 156.9766, "step": 16110 }, { "epoch": 0.1333498779832072, "grad_norm": 877.50244140625, "learning_rate": 9.9721282610103e-06, "loss": 191.0899, "step": 16120 }, { "epoch": 0.13343260123257641, "grad_norm": 1021.2138671875, "learning_rate": 9.971979353911891e-06, "loss": 133.9165, "step": 16130 }, { "epoch": 0.13351532448194564, "grad_norm": 847.0870971679688, "learning_rate": 9.971830051215905e-06, "loss": 101.3374, "step": 16140 }, { "epoch": 0.1335980477313149, "grad_norm": 2785.597412109375, "learning_rate": 9.97168035293422e-06, "loss": 267.7292, "step": 16150 }, { "epoch": 0.13368077098068412, "grad_norm": 801.3421020507812, "learning_rate": 9.971530259078743e-06, "loss": 111.4734, "step": 16160 }, { "epoch": 0.13376349423005335, "grad_norm": 768.2542114257812, "learning_rate": 9.971379769661422e-06, "loss": 149.4196, "step": 16170 }, { "epoch": 0.1338462174794226, "grad_norm": 893.0291748046875, "learning_rate": 9.971228884694228e-06, "loss": 122.37, "step": 16180 }, { "epoch": 0.13392894072879183, "grad_norm": 1295.072509765625, "learning_rate": 9.971077604189166e-06, "loss": 156.3286, "step": 16190 }, { "epoch": 0.13401166397816106, "grad_norm": 998.2085571289062, "learning_rate": 9.970925928158275e-06, "loss": 122.403, "step": 16200 }, { "epoch": 0.1340943872275303, "grad_norm": 701.3370361328125, "learning_rate": 9.970773856613617e-06, "loss": 140.6802, "step": 16210 }, { "epoch": 0.13417711047689954, "grad_norm": 971.6983032226562, "learning_rate": 9.970621389567301e-06, "loss": 178.1052, "step": 16220 }, { "epoch": 0.13425983372626876, "grad_norm": 2665.119384765625, "learning_rate": 9.97046852703145e-06, "loss": 138.6044, "step": 16230 }, { "epoch": 0.13434255697563802, "grad_norm": 2127.31884765625, "learning_rate": 9.970315269018231e-06, "loss": 157.2493, "step": 16240 }, { "epoch": 0.13442528022500724, "grad_norm": 1778.2391357421875, "learning_rate": 9.970161615539837e-06, "loss": 134.0471, "step": 16250 }, { "epoch": 0.13450800347437647, "grad_norm": 993.4716796875, "learning_rate": 9.970007566608492e-06, "loss": 146.2506, "step": 16260 }, { "epoch": 0.1345907267237457, "grad_norm": 798.2664184570312, "learning_rate": 9.969853122236455e-06, "loss": 114.1296, "step": 16270 }, { "epoch": 0.13467344997311495, "grad_norm": 703.0869750976562, "learning_rate": 9.969698282436013e-06, "loss": 120.5299, "step": 16280 }, { "epoch": 0.13475617322248418, "grad_norm": 1201.6317138671875, "learning_rate": 9.969543047219487e-06, "loss": 125.8007, "step": 16290 }, { "epoch": 0.1348388964718534, "grad_norm": 1785.0177001953125, "learning_rate": 9.969387416599227e-06, "loss": 144.5029, "step": 16300 }, { "epoch": 0.13492161972122266, "grad_norm": 1228.9619140625, "learning_rate": 9.969231390587618e-06, "loss": 164.9693, "step": 16310 }, { "epoch": 0.13500434297059188, "grad_norm": 864.3604736328125, "learning_rate": 9.969074969197072e-06, "loss": 168.7043, "step": 16320 }, { "epoch": 0.1350870662199611, "grad_norm": 1214.023681640625, "learning_rate": 9.968918152440036e-06, "loss": 172.751, "step": 16330 }, { "epoch": 0.13516978946933036, "grad_norm": 928.501220703125, "learning_rate": 9.968760940328987e-06, "loss": 131.5311, "step": 16340 }, { "epoch": 0.1352525127186996, "grad_norm": 510.1147155761719, "learning_rate": 9.968603332876435e-06, "loss": 171.1721, "step": 16350 }, { "epoch": 0.13533523596806882, "grad_norm": 1110.3807373046875, "learning_rate": 9.968445330094915e-06, "loss": 169.255, "step": 16360 }, { "epoch": 0.13541795921743807, "grad_norm": 1672.8614501953125, "learning_rate": 9.968286931997004e-06, "loss": 112.5926, "step": 16370 }, { "epoch": 0.1355006824668073, "grad_norm": 1014.0128784179688, "learning_rate": 9.968128138595304e-06, "loss": 100.9882, "step": 16380 }, { "epoch": 0.13558340571617652, "grad_norm": 1446.147216796875, "learning_rate": 9.967968949902448e-06, "loss": 185.0402, "step": 16390 }, { "epoch": 0.13566612896554578, "grad_norm": 753.0343627929688, "learning_rate": 9.967809365931102e-06, "loss": 148.759, "step": 16400 }, { "epoch": 0.135748852214915, "grad_norm": 909.8871459960938, "learning_rate": 9.967649386693964e-06, "loss": 123.6662, "step": 16410 }, { "epoch": 0.13583157546428423, "grad_norm": 1223.7244873046875, "learning_rate": 9.967489012203765e-06, "loss": 132.6178, "step": 16420 }, { "epoch": 0.13591429871365346, "grad_norm": 1106.0858154296875, "learning_rate": 9.967328242473261e-06, "loss": 146.9553, "step": 16430 }, { "epoch": 0.1359970219630227, "grad_norm": 1789.8829345703125, "learning_rate": 9.967167077515246e-06, "loss": 133.0784, "step": 16440 }, { "epoch": 0.13607974521239194, "grad_norm": 741.1414184570312, "learning_rate": 9.967005517342544e-06, "loss": 143.1583, "step": 16450 }, { "epoch": 0.13616246846176117, "grad_norm": 1324.021240234375, "learning_rate": 9.966843561968005e-06, "loss": 108.1861, "step": 16460 }, { "epoch": 0.13624519171113042, "grad_norm": 866.0011596679688, "learning_rate": 9.966681211404521e-06, "loss": 138.6324, "step": 16470 }, { "epoch": 0.13632791496049965, "grad_norm": 520.3377685546875, "learning_rate": 9.966518465665007e-06, "loss": 113.3134, "step": 16480 }, { "epoch": 0.13641063820986887, "grad_norm": 883.1153564453125, "learning_rate": 9.966355324762412e-06, "loss": 163.313, "step": 16490 }, { "epoch": 0.13649336145923813, "grad_norm": 1007.1843872070312, "learning_rate": 9.966191788709716e-06, "loss": 140.2184, "step": 16500 }, { "epoch": 0.13657608470860735, "grad_norm": 1669.2816162109375, "learning_rate": 9.966027857519931e-06, "loss": 188.2176, "step": 16510 }, { "epoch": 0.13665880795797658, "grad_norm": 772.6116943359375, "learning_rate": 9.9658635312061e-06, "loss": 163.7544, "step": 16520 }, { "epoch": 0.13674153120734583, "grad_norm": 706.4850463867188, "learning_rate": 9.965698809781298e-06, "loss": 121.3989, "step": 16530 }, { "epoch": 0.13682425445671506, "grad_norm": 766.0828247070312, "learning_rate": 9.965533693258632e-06, "loss": 213.4713, "step": 16540 }, { "epoch": 0.1369069777060843, "grad_norm": 957.917724609375, "learning_rate": 9.965368181651239e-06, "loss": 183.1273, "step": 16550 }, { "epoch": 0.13698970095545354, "grad_norm": 696.8062744140625, "learning_rate": 9.965202274972288e-06, "loss": 112.6891, "step": 16560 }, { "epoch": 0.13707242420482277, "grad_norm": 902.3621215820312, "learning_rate": 9.965035973234977e-06, "loss": 113.6838, "step": 16570 }, { "epoch": 0.137155147454192, "grad_norm": 1020.390625, "learning_rate": 9.964869276452542e-06, "loss": 106.0109, "step": 16580 }, { "epoch": 0.13723787070356125, "grad_norm": 1181.8326416015625, "learning_rate": 9.964702184638244e-06, "loss": 139.7021, "step": 16590 }, { "epoch": 0.13732059395293048, "grad_norm": 629.9285278320312, "learning_rate": 9.964534697805377e-06, "loss": 193.1732, "step": 16600 }, { "epoch": 0.1374033172022997, "grad_norm": 1531.7962646484375, "learning_rate": 9.96436681596727e-06, "loss": 154.7776, "step": 16610 }, { "epoch": 0.13748604045166893, "grad_norm": 1220.1796875, "learning_rate": 9.964198539137277e-06, "loss": 191.2195, "step": 16620 }, { "epoch": 0.13756876370103818, "grad_norm": 0.0, "learning_rate": 9.964029867328791e-06, "loss": 112.8693, "step": 16630 }, { "epoch": 0.1376514869504074, "grad_norm": 1105.817626953125, "learning_rate": 9.963860800555228e-06, "loss": 103.0777, "step": 16640 }, { "epoch": 0.13773421019977664, "grad_norm": 472.4584655761719, "learning_rate": 9.963691338830045e-06, "loss": 123.1952, "step": 16650 }, { "epoch": 0.1378169334491459, "grad_norm": 990.940673828125, "learning_rate": 9.963521482166718e-06, "loss": 136.4567, "step": 16660 }, { "epoch": 0.13789965669851512, "grad_norm": 1503.9461669921875, "learning_rate": 9.96335123057877e-06, "loss": 136.2858, "step": 16670 }, { "epoch": 0.13798237994788434, "grad_norm": 1348.58740234375, "learning_rate": 9.963180584079741e-06, "loss": 137.5341, "step": 16680 }, { "epoch": 0.1380651031972536, "grad_norm": 1100.0037841796875, "learning_rate": 9.963009542683214e-06, "loss": 199.9709, "step": 16690 }, { "epoch": 0.13814782644662282, "grad_norm": 718.8609619140625, "learning_rate": 9.962838106402791e-06, "loss": 184.6782, "step": 16700 }, { "epoch": 0.13823054969599205, "grad_norm": 865.5576782226562, "learning_rate": 9.962666275252117e-06, "loss": 104.1854, "step": 16710 }, { "epoch": 0.1383132729453613, "grad_norm": 1161.63525390625, "learning_rate": 9.962494049244866e-06, "loss": 169.3983, "step": 16720 }, { "epoch": 0.13839599619473053, "grad_norm": 589.0774536132812, "learning_rate": 9.962321428394735e-06, "loss": 165.776, "step": 16730 }, { "epoch": 0.13847871944409976, "grad_norm": 2693.160888671875, "learning_rate": 9.962148412715464e-06, "loss": 154.1448, "step": 16740 }, { "epoch": 0.138561442693469, "grad_norm": 1310.2269287109375, "learning_rate": 9.961975002220816e-06, "loss": 166.3599, "step": 16750 }, { "epoch": 0.13864416594283824, "grad_norm": 1167.153076171875, "learning_rate": 9.96180119692459e-06, "loss": 171.0495, "step": 16760 }, { "epoch": 0.13872688919220746, "grad_norm": 1377.29833984375, "learning_rate": 9.961626996840613e-06, "loss": 102.7167, "step": 16770 }, { "epoch": 0.13880961244157672, "grad_norm": 977.5831909179688, "learning_rate": 9.961452401982748e-06, "loss": 136.4004, "step": 16780 }, { "epoch": 0.13889233569094595, "grad_norm": 1010.1982421875, "learning_rate": 9.961277412364884e-06, "loss": 146.971, "step": 16790 }, { "epoch": 0.13897505894031517, "grad_norm": 814.7576293945312, "learning_rate": 9.961102028000948e-06, "loss": 213.2676, "step": 16800 }, { "epoch": 0.1390577821896844, "grad_norm": 881.7014770507812, "learning_rate": 9.96092624890489e-06, "loss": 91.0271, "step": 16810 }, { "epoch": 0.13914050543905365, "grad_norm": 4899.205078125, "learning_rate": 9.960750075090698e-06, "loss": 166.8467, "step": 16820 }, { "epoch": 0.13922322868842288, "grad_norm": 1270.030029296875, "learning_rate": 9.960573506572391e-06, "loss": 186.535, "step": 16830 }, { "epoch": 0.1393059519377921, "grad_norm": 1338.3089599609375, "learning_rate": 9.960396543364013e-06, "loss": 192.4324, "step": 16840 }, { "epoch": 0.13938867518716136, "grad_norm": 1512.3917236328125, "learning_rate": 9.96021918547965e-06, "loss": 124.9194, "step": 16850 }, { "epoch": 0.13947139843653059, "grad_norm": 1637.7535400390625, "learning_rate": 9.96004143293341e-06, "loss": 131.2566, "step": 16860 }, { "epoch": 0.1395541216858998, "grad_norm": 1564.211669921875, "learning_rate": 9.959863285739436e-06, "loss": 124.8255, "step": 16870 }, { "epoch": 0.13963684493526907, "grad_norm": 720.8834228515625, "learning_rate": 9.959684743911904e-06, "loss": 140.7759, "step": 16880 }, { "epoch": 0.1397195681846383, "grad_norm": 796.6300659179688, "learning_rate": 9.959505807465018e-06, "loss": 120.1176, "step": 16890 }, { "epoch": 0.13980229143400752, "grad_norm": 1232.4276123046875, "learning_rate": 9.959326476413016e-06, "loss": 130.2664, "step": 16900 }, { "epoch": 0.13988501468337677, "grad_norm": 457.3919677734375, "learning_rate": 9.959146750770167e-06, "loss": 124.8512, "step": 16910 }, { "epoch": 0.139967737932746, "grad_norm": 708.2092895507812, "learning_rate": 9.95896663055077e-06, "loss": 120.5444, "step": 16920 }, { "epoch": 0.14005046118211523, "grad_norm": 995.7003784179688, "learning_rate": 9.958786115769157e-06, "loss": 114.9213, "step": 16930 }, { "epoch": 0.14013318443148448, "grad_norm": 1515.4827880859375, "learning_rate": 9.958605206439692e-06, "loss": 146.7894, "step": 16940 }, { "epoch": 0.1402159076808537, "grad_norm": 814.6317138671875, "learning_rate": 9.958423902576764e-06, "loss": 99.1024, "step": 16950 }, { "epoch": 0.14029863093022293, "grad_norm": 760.5602416992188, "learning_rate": 9.958242204194804e-06, "loss": 160.827, "step": 16960 }, { "epoch": 0.1403813541795922, "grad_norm": 738.33349609375, "learning_rate": 9.958060111308267e-06, "loss": 136.0457, "step": 16970 }, { "epoch": 0.14046407742896141, "grad_norm": 1149.28857421875, "learning_rate": 9.957877623931642e-06, "loss": 151.1577, "step": 16980 }, { "epoch": 0.14054680067833064, "grad_norm": 1362.2108154296875, "learning_rate": 9.95769474207945e-06, "loss": 173.5694, "step": 16990 }, { "epoch": 0.14062952392769987, "grad_norm": 1314.1846923828125, "learning_rate": 9.957511465766236e-06, "loss": 169.4035, "step": 17000 }, { "epoch": 0.14071224717706912, "grad_norm": 1065.3922119140625, "learning_rate": 9.957327795006589e-06, "loss": 169.1779, "step": 17010 }, { "epoch": 0.14079497042643835, "grad_norm": 1382.638427734375, "learning_rate": 9.95714372981512e-06, "loss": 145.6161, "step": 17020 }, { "epoch": 0.14087769367580757, "grad_norm": 1485.4481201171875, "learning_rate": 9.956959270206474e-06, "loss": 131.7884, "step": 17030 }, { "epoch": 0.14096041692517683, "grad_norm": 901.7747192382812, "learning_rate": 9.956774416195329e-06, "loss": 129.2612, "step": 17040 }, { "epoch": 0.14104314017454606, "grad_norm": 1346.950439453125, "learning_rate": 9.956589167796392e-06, "loss": 108.1172, "step": 17050 }, { "epoch": 0.14112586342391528, "grad_norm": 857.8418579101562, "learning_rate": 9.956403525024402e-06, "loss": 132.697, "step": 17060 }, { "epoch": 0.14120858667328454, "grad_norm": 1938.1868896484375, "learning_rate": 9.956217487894131e-06, "loss": 165.6452, "step": 17070 }, { "epoch": 0.14129130992265376, "grad_norm": 749.3518676757812, "learning_rate": 9.95603105642038e-06, "loss": 212.4321, "step": 17080 }, { "epoch": 0.141374033172023, "grad_norm": 709.408447265625, "learning_rate": 9.955844230617985e-06, "loss": 156.41, "step": 17090 }, { "epoch": 0.14145675642139224, "grad_norm": 1008.6261596679688, "learning_rate": 9.955657010501807e-06, "loss": 118.0272, "step": 17100 }, { "epoch": 0.14153947967076147, "grad_norm": 828.895751953125, "learning_rate": 9.955469396086743e-06, "loss": 138.8411, "step": 17110 }, { "epoch": 0.1416222029201307, "grad_norm": 1362.32421875, "learning_rate": 9.955281387387724e-06, "loss": 145.7589, "step": 17120 }, { "epoch": 0.14170492616949995, "grad_norm": 1597.079345703125, "learning_rate": 9.955092984419705e-06, "loss": 170.475, "step": 17130 }, { "epoch": 0.14178764941886918, "grad_norm": 1059.4306640625, "learning_rate": 9.954904187197679e-06, "loss": 158.0434, "step": 17140 }, { "epoch": 0.1418703726682384, "grad_norm": 694.0506591796875, "learning_rate": 9.954714995736667e-06, "loss": 142.6755, "step": 17150 }, { "epoch": 0.14195309591760763, "grad_norm": 1392.7862548828125, "learning_rate": 9.95452541005172e-06, "loss": 192.9698, "step": 17160 }, { "epoch": 0.14203581916697688, "grad_norm": 1239.712646484375, "learning_rate": 9.954335430157926e-06, "loss": 126.2119, "step": 17170 }, { "epoch": 0.1421185424163461, "grad_norm": 949.230712890625, "learning_rate": 9.9541450560704e-06, "loss": 76.8772, "step": 17180 }, { "epoch": 0.14220126566571534, "grad_norm": 1190.4364013671875, "learning_rate": 9.953954287804286e-06, "loss": 156.7768, "step": 17190 }, { "epoch": 0.1422839889150846, "grad_norm": 1422.4742431640625, "learning_rate": 9.953763125374767e-06, "loss": 107.7513, "step": 17200 }, { "epoch": 0.14236671216445382, "grad_norm": 1076.408935546875, "learning_rate": 9.953571568797049e-06, "loss": 136.0641, "step": 17210 }, { "epoch": 0.14244943541382304, "grad_norm": 930.828125, "learning_rate": 9.953379618086377e-06, "loss": 143.9599, "step": 17220 }, { "epoch": 0.1425321586631923, "grad_norm": 1367.8873291015625, "learning_rate": 9.95318727325802e-06, "loss": 128.7768, "step": 17230 }, { "epoch": 0.14261488191256153, "grad_norm": 1150.171875, "learning_rate": 9.952994534327283e-06, "loss": 124.427, "step": 17240 }, { "epoch": 0.14269760516193075, "grad_norm": 821.237548828125, "learning_rate": 9.952801401309504e-06, "loss": 137.096, "step": 17250 }, { "epoch": 0.1427803284113, "grad_norm": 1357.8616943359375, "learning_rate": 9.952607874220048e-06, "loss": 201.047, "step": 17260 }, { "epoch": 0.14286305166066923, "grad_norm": 1452.91650390625, "learning_rate": 9.952413953074312e-06, "loss": 199.8793, "step": 17270 }, { "epoch": 0.14294577491003846, "grad_norm": 965.8828125, "learning_rate": 9.952219637887725e-06, "loss": 129.7407, "step": 17280 }, { "epoch": 0.1430284981594077, "grad_norm": 1721.4344482421875, "learning_rate": 9.952024928675752e-06, "loss": 177.8543, "step": 17290 }, { "epoch": 0.14311122140877694, "grad_norm": 3541.317626953125, "learning_rate": 9.951829825453881e-06, "loss": 167.7698, "step": 17300 }, { "epoch": 0.14319394465814617, "grad_norm": 2036.2423095703125, "learning_rate": 9.951634328237635e-06, "loss": 141.8449, "step": 17310 }, { "epoch": 0.14327666790751542, "grad_norm": 880.5416870117188, "learning_rate": 9.951438437042572e-06, "loss": 198.8033, "step": 17320 }, { "epoch": 0.14335939115688465, "grad_norm": 807.236572265625, "learning_rate": 9.951242151884275e-06, "loss": 112.0078, "step": 17330 }, { "epoch": 0.14344211440625387, "grad_norm": 1530.7301025390625, "learning_rate": 9.951045472778365e-06, "loss": 133.3953, "step": 17340 }, { "epoch": 0.1435248376556231, "grad_norm": 1775.3485107421875, "learning_rate": 9.950848399740488e-06, "loss": 132.5112, "step": 17350 }, { "epoch": 0.14360756090499235, "grad_norm": 1216.1314697265625, "learning_rate": 9.950650932786325e-06, "loss": 150.7454, "step": 17360 }, { "epoch": 0.14369028415436158, "grad_norm": 756.1212158203125, "learning_rate": 9.95045307193159e-06, "loss": 114.4585, "step": 17370 }, { "epoch": 0.1437730074037308, "grad_norm": 987.248779296875, "learning_rate": 9.95025481719202e-06, "loss": 140.8504, "step": 17380 }, { "epoch": 0.14385573065310006, "grad_norm": 1126.249267578125, "learning_rate": 9.950056168583395e-06, "loss": 225.9696, "step": 17390 }, { "epoch": 0.1439384539024693, "grad_norm": 706.3463745117188, "learning_rate": 9.949857126121519e-06, "loss": 113.696, "step": 17400 }, { "epoch": 0.14402117715183851, "grad_norm": 892.3402099609375, "learning_rate": 9.949657689822226e-06, "loss": 162.9231, "step": 17410 }, { "epoch": 0.14410390040120777, "grad_norm": 856.6466674804688, "learning_rate": 9.949457859701388e-06, "loss": 99.4635, "step": 17420 }, { "epoch": 0.144186623650577, "grad_norm": 775.4996948242188, "learning_rate": 9.949257635774903e-06, "loss": 152.7363, "step": 17430 }, { "epoch": 0.14426934689994622, "grad_norm": 842.1768798828125, "learning_rate": 9.9490570180587e-06, "loss": 85.8346, "step": 17440 }, { "epoch": 0.14435207014931548, "grad_norm": 1798.95849609375, "learning_rate": 9.948856006568746e-06, "loss": 197.5757, "step": 17450 }, { "epoch": 0.1444347933986847, "grad_norm": 1381.5155029296875, "learning_rate": 9.94865460132103e-06, "loss": 150.2531, "step": 17460 }, { "epoch": 0.14451751664805393, "grad_norm": 997.7630004882812, "learning_rate": 9.948452802331578e-06, "loss": 133.1603, "step": 17470 }, { "epoch": 0.14460023989742318, "grad_norm": 1275.1690673828125, "learning_rate": 9.948250609616449e-06, "loss": 168.5733, "step": 17480 }, { "epoch": 0.1446829631467924, "grad_norm": 1112.8721923828125, "learning_rate": 9.948048023191728e-06, "loss": 182.301, "step": 17490 }, { "epoch": 0.14476568639616164, "grad_norm": 950.4414672851562, "learning_rate": 9.947845043073533e-06, "loss": 149.5477, "step": 17500 }, { "epoch": 0.1448484096455309, "grad_norm": 1122.95751953125, "learning_rate": 9.947641669278016e-06, "loss": 123.1119, "step": 17510 }, { "epoch": 0.14493113289490012, "grad_norm": 1148.9334716796875, "learning_rate": 9.947437901821358e-06, "loss": 128.3063, "step": 17520 }, { "epoch": 0.14501385614426934, "grad_norm": 1392.179443359375, "learning_rate": 9.947233740719772e-06, "loss": 139.3278, "step": 17530 }, { "epoch": 0.14509657939363857, "grad_norm": 604.5231323242188, "learning_rate": 9.947029185989501e-06, "loss": 163.2896, "step": 17540 }, { "epoch": 0.14517930264300782, "grad_norm": 1102.948486328125, "learning_rate": 9.946824237646823e-06, "loss": 153.8839, "step": 17550 }, { "epoch": 0.14526202589237705, "grad_norm": 2167.79638671875, "learning_rate": 9.946618895708043e-06, "loss": 172.0367, "step": 17560 }, { "epoch": 0.14534474914174628, "grad_norm": 1476.7362060546875, "learning_rate": 9.946413160189498e-06, "loss": 138.3295, "step": 17570 }, { "epoch": 0.14542747239111553, "grad_norm": 882.1810913085938, "learning_rate": 9.946207031107562e-06, "loss": 186.2194, "step": 17580 }, { "epoch": 0.14551019564048476, "grad_norm": 2111.673095703125, "learning_rate": 9.94600050847863e-06, "loss": 170.1872, "step": 17590 }, { "epoch": 0.14559291888985398, "grad_norm": 1099.032958984375, "learning_rate": 9.945793592319137e-06, "loss": 128.6498, "step": 17600 }, { "epoch": 0.14567564213922324, "grad_norm": 1059.4005126953125, "learning_rate": 9.945586282645545e-06, "loss": 134.5357, "step": 17610 }, { "epoch": 0.14575836538859246, "grad_norm": 1566.564208984375, "learning_rate": 9.945378579474351e-06, "loss": 164.359, "step": 17620 }, { "epoch": 0.1458410886379617, "grad_norm": 782.61279296875, "learning_rate": 9.945170482822079e-06, "loss": 106.899, "step": 17630 }, { "epoch": 0.14592381188733095, "grad_norm": 1026.7816162109375, "learning_rate": 9.944961992705288e-06, "loss": 142.0462, "step": 17640 }, { "epoch": 0.14600653513670017, "grad_norm": 817.039306640625, "learning_rate": 9.944753109140564e-06, "loss": 166.4367, "step": 17650 }, { "epoch": 0.1460892583860694, "grad_norm": 856.3842163085938, "learning_rate": 9.94454383214453e-06, "loss": 131.3289, "step": 17660 }, { "epoch": 0.14617198163543865, "grad_norm": 1656.999755859375, "learning_rate": 9.944334161733835e-06, "loss": 129.1978, "step": 17670 }, { "epoch": 0.14625470488480788, "grad_norm": 1338.8382568359375, "learning_rate": 9.944124097925161e-06, "loss": 184.4288, "step": 17680 }, { "epoch": 0.1463374281341771, "grad_norm": 1011.0686645507812, "learning_rate": 9.943913640735224e-06, "loss": 127.4451, "step": 17690 }, { "epoch": 0.14642015138354633, "grad_norm": 923.1884765625, "learning_rate": 9.94370279018077e-06, "loss": 120.5529, "step": 17700 }, { "epoch": 0.1465028746329156, "grad_norm": 1001.093505859375, "learning_rate": 9.94349154627857e-06, "loss": 129.3988, "step": 17710 }, { "epoch": 0.1465855978822848, "grad_norm": 1354.6356201171875, "learning_rate": 9.943279909045438e-06, "loss": 122.9835, "step": 17720 }, { "epoch": 0.14666832113165404, "grad_norm": 1260.7392578125, "learning_rate": 9.94306787849821e-06, "loss": 101.6319, "step": 17730 }, { "epoch": 0.1467510443810233, "grad_norm": 1424.63330078125, "learning_rate": 9.942855454653755e-06, "loss": 179.1118, "step": 17740 }, { "epoch": 0.14683376763039252, "grad_norm": 1053.8809814453125, "learning_rate": 9.942642637528977e-06, "loss": 167.5939, "step": 17750 }, { "epoch": 0.14691649087976175, "grad_norm": 936.3515014648438, "learning_rate": 9.942429427140807e-06, "loss": 154.7948, "step": 17760 }, { "epoch": 0.146999214129131, "grad_norm": 876.3916015625, "learning_rate": 9.942215823506211e-06, "loss": 114.5722, "step": 17770 }, { "epoch": 0.14708193737850023, "grad_norm": 1172.0423583984375, "learning_rate": 9.942001826642184e-06, "loss": 142.9646, "step": 17780 }, { "epoch": 0.14716466062786945, "grad_norm": 1635.97802734375, "learning_rate": 9.941787436565751e-06, "loss": 150.69, "step": 17790 }, { "epoch": 0.1472473838772387, "grad_norm": 728.3792724609375, "learning_rate": 9.941572653293974e-06, "loss": 97.5937, "step": 17800 }, { "epoch": 0.14733010712660793, "grad_norm": 935.0343627929688, "learning_rate": 9.941357476843938e-06, "loss": 135.0443, "step": 17810 }, { "epoch": 0.14741283037597716, "grad_norm": 583.3887329101562, "learning_rate": 9.941141907232766e-06, "loss": 134.4311, "step": 17820 }, { "epoch": 0.14749555362534642, "grad_norm": 1191.19677734375, "learning_rate": 9.940925944477608e-06, "loss": 129.727, "step": 17830 }, { "epoch": 0.14757827687471564, "grad_norm": 1111.1417236328125, "learning_rate": 9.940709588595649e-06, "loss": 171.4274, "step": 17840 }, { "epoch": 0.14766100012408487, "grad_norm": 2006.4134521484375, "learning_rate": 9.940492839604103e-06, "loss": 152.9817, "step": 17850 }, { "epoch": 0.14774372337345412, "grad_norm": 1163.596923828125, "learning_rate": 9.940275697520216e-06, "loss": 169.9584, "step": 17860 }, { "epoch": 0.14782644662282335, "grad_norm": 1189.015869140625, "learning_rate": 9.940058162361264e-06, "loss": 152.1794, "step": 17870 }, { "epoch": 0.14790916987219258, "grad_norm": 998.8855590820312, "learning_rate": 9.939840234144556e-06, "loss": 129.5204, "step": 17880 }, { "epoch": 0.1479918931215618, "grad_norm": 442.9149475097656, "learning_rate": 9.939621912887431e-06, "loss": 106.7805, "step": 17890 }, { "epoch": 0.14807461637093106, "grad_norm": 830.00927734375, "learning_rate": 9.93940319860726e-06, "loss": 139.6457, "step": 17900 }, { "epoch": 0.14815733962030028, "grad_norm": 1069.5220947265625, "learning_rate": 9.939184091321445e-06, "loss": 129.1493, "step": 17910 }, { "epoch": 0.1482400628696695, "grad_norm": 1180.868896484375, "learning_rate": 9.938964591047421e-06, "loss": 108.2578, "step": 17920 }, { "epoch": 0.14832278611903876, "grad_norm": 1095.6793212890625, "learning_rate": 9.938744697802651e-06, "loss": 145.4649, "step": 17930 }, { "epoch": 0.148405509368408, "grad_norm": 1292.62744140625, "learning_rate": 9.938524411604631e-06, "loss": 145.161, "step": 17940 }, { "epoch": 0.14848823261777722, "grad_norm": 1319.2213134765625, "learning_rate": 9.938303732470888e-06, "loss": 129.5037, "step": 17950 }, { "epoch": 0.14857095586714647, "grad_norm": 697.8318481445312, "learning_rate": 9.938082660418981e-06, "loss": 103.5571, "step": 17960 }, { "epoch": 0.1486536791165157, "grad_norm": 784.6300659179688, "learning_rate": 9.937861195466498e-06, "loss": 133.7046, "step": 17970 }, { "epoch": 0.14873640236588492, "grad_norm": 966.1806030273438, "learning_rate": 9.937639337631064e-06, "loss": 170.2544, "step": 17980 }, { "epoch": 0.14881912561525418, "grad_norm": 862.203857421875, "learning_rate": 9.937417086930328e-06, "loss": 129.5846, "step": 17990 }, { "epoch": 0.1489018488646234, "grad_norm": 3391.59716796875, "learning_rate": 9.937194443381972e-06, "loss": 195.0929, "step": 18000 }, { "epoch": 0.14898457211399263, "grad_norm": 737.4010009765625, "learning_rate": 9.936971407003714e-06, "loss": 110.9804, "step": 18010 }, { "epoch": 0.14906729536336188, "grad_norm": 1527.3822021484375, "learning_rate": 9.936747977813299e-06, "loss": 124.5241, "step": 18020 }, { "epoch": 0.1491500186127311, "grad_norm": 1383.10986328125, "learning_rate": 9.936524155828503e-06, "loss": 138.0007, "step": 18030 }, { "epoch": 0.14923274186210034, "grad_norm": 850.4631958007812, "learning_rate": 9.936299941067137e-06, "loss": 131.9197, "step": 18040 }, { "epoch": 0.1493154651114696, "grad_norm": 736.7586059570312, "learning_rate": 9.93607533354704e-06, "loss": 116.8003, "step": 18050 }, { "epoch": 0.14939818836083882, "grad_norm": 3558.953857421875, "learning_rate": 9.935850333286081e-06, "loss": 236.4352, "step": 18060 }, { "epoch": 0.14948091161020804, "grad_norm": 2170.6923828125, "learning_rate": 9.935624940302165e-06, "loss": 162.2385, "step": 18070 }, { "epoch": 0.14956363485957727, "grad_norm": 890.1776123046875, "learning_rate": 9.93539915461322e-06, "loss": 152.4946, "step": 18080 }, { "epoch": 0.14964635810894653, "grad_norm": 1055.08447265625, "learning_rate": 9.935172976237218e-06, "loss": 179.4581, "step": 18090 }, { "epoch": 0.14972908135831575, "grad_norm": 1069.735595703125, "learning_rate": 9.934946405192152e-06, "loss": 109.1896, "step": 18100 }, { "epoch": 0.14981180460768498, "grad_norm": 936.50048828125, "learning_rate": 9.934719441496048e-06, "loss": 170.2172, "step": 18110 }, { "epoch": 0.14989452785705423, "grad_norm": 1424.594970703125, "learning_rate": 9.934492085166965e-06, "loss": 120.5943, "step": 18120 }, { "epoch": 0.14997725110642346, "grad_norm": 1259.637939453125, "learning_rate": 9.934264336222992e-06, "loss": 141.8418, "step": 18130 }, { "epoch": 0.15005997435579269, "grad_norm": 1107.58447265625, "learning_rate": 9.934036194682253e-06, "loss": 132.8073, "step": 18140 }, { "epoch": 0.15014269760516194, "grad_norm": 1031.5169677734375, "learning_rate": 9.933807660562898e-06, "loss": 122.9906, "step": 18150 }, { "epoch": 0.15022542085453117, "grad_norm": 683.3692626953125, "learning_rate": 9.933578733883109e-06, "loss": 175.373, "step": 18160 }, { "epoch": 0.1503081441039004, "grad_norm": 841.3174438476562, "learning_rate": 9.933349414661103e-06, "loss": 143.8702, "step": 18170 }, { "epoch": 0.15039086735326965, "grad_norm": 1086.541015625, "learning_rate": 9.933119702915125e-06, "loss": 149.0898, "step": 18180 }, { "epoch": 0.15047359060263887, "grad_norm": 1380.8690185546875, "learning_rate": 9.932889598663452e-06, "loss": 142.0298, "step": 18190 }, { "epoch": 0.1505563138520081, "grad_norm": 800.4336547851562, "learning_rate": 9.932659101924393e-06, "loss": 169.3204, "step": 18200 }, { "epoch": 0.15063903710137735, "grad_norm": 883.3157348632812, "learning_rate": 9.932428212716287e-06, "loss": 183.8594, "step": 18210 }, { "epoch": 0.15072176035074658, "grad_norm": 922.6904907226562, "learning_rate": 9.932196931057505e-06, "loss": 157.8369, "step": 18220 }, { "epoch": 0.1508044836001158, "grad_norm": 1918.9375, "learning_rate": 9.931965256966449e-06, "loss": 143.9471, "step": 18230 }, { "epoch": 0.15088720684948506, "grad_norm": 1153.788818359375, "learning_rate": 9.931733190461552e-06, "loss": 167.4599, "step": 18240 }, { "epoch": 0.1509699300988543, "grad_norm": 1510.7779541015625, "learning_rate": 9.931500731561279e-06, "loss": 123.1982, "step": 18250 }, { "epoch": 0.15105265334822351, "grad_norm": 939.08447265625, "learning_rate": 9.931267880284124e-06, "loss": 128.6788, "step": 18260 }, { "epoch": 0.15113537659759274, "grad_norm": 521.10693359375, "learning_rate": 9.931034636648616e-06, "loss": 110.3548, "step": 18270 }, { "epoch": 0.151218099846962, "grad_norm": 1793.3514404296875, "learning_rate": 9.930801000673314e-06, "loss": 226.4601, "step": 18280 }, { "epoch": 0.15130082309633122, "grad_norm": 1056.244384765625, "learning_rate": 9.930566972376803e-06, "loss": 137.7991, "step": 18290 }, { "epoch": 0.15138354634570045, "grad_norm": 1053.8623046875, "learning_rate": 9.930332551777709e-06, "loss": 126.393, "step": 18300 }, { "epoch": 0.1514662695950697, "grad_norm": 466.3129577636719, "learning_rate": 9.930097738894679e-06, "loss": 142.9212, "step": 18310 }, { "epoch": 0.15154899284443893, "grad_norm": 1002.7549438476562, "learning_rate": 9.929862533746398e-06, "loss": 142.7721, "step": 18320 }, { "epoch": 0.15163171609380816, "grad_norm": 758.2431030273438, "learning_rate": 9.92962693635158e-06, "loss": 138.2474, "step": 18330 }, { "epoch": 0.1517144393431774, "grad_norm": 640.2601318359375, "learning_rate": 9.929390946728972e-06, "loss": 127.6863, "step": 18340 }, { "epoch": 0.15179716259254664, "grad_norm": 974.4703979492188, "learning_rate": 9.929154564897347e-06, "loss": 127.8559, "step": 18350 }, { "epoch": 0.15187988584191586, "grad_norm": 989.0883178710938, "learning_rate": 9.928917790875519e-06, "loss": 146.4885, "step": 18360 }, { "epoch": 0.15196260909128512, "grad_norm": 926.8894653320312, "learning_rate": 9.92868062468232e-06, "loss": 140.2718, "step": 18370 }, { "epoch": 0.15204533234065434, "grad_norm": 1093.3875732421875, "learning_rate": 9.928443066336624e-06, "loss": 156.4275, "step": 18380 }, { "epoch": 0.15212805559002357, "grad_norm": 1011.3756713867188, "learning_rate": 9.92820511585733e-06, "loss": 110.9518, "step": 18390 }, { "epoch": 0.15221077883939282, "grad_norm": 963.0042114257812, "learning_rate": 9.927966773263375e-06, "loss": 126.9806, "step": 18400 }, { "epoch": 0.15229350208876205, "grad_norm": 1338.420654296875, "learning_rate": 9.92772803857372e-06, "loss": 151.1979, "step": 18410 }, { "epoch": 0.15237622533813128, "grad_norm": 2231.09326171875, "learning_rate": 9.927488911807359e-06, "loss": 126.2641, "step": 18420 }, { "epoch": 0.1524589485875005, "grad_norm": 740.568603515625, "learning_rate": 9.927249392983319e-06, "loss": 161.8315, "step": 18430 }, { "epoch": 0.15254167183686976, "grad_norm": 1194.8526611328125, "learning_rate": 9.927009482120658e-06, "loss": 147.5258, "step": 18440 }, { "epoch": 0.15262439508623898, "grad_norm": 575.281005859375, "learning_rate": 9.926769179238467e-06, "loss": 123.2295, "step": 18450 }, { "epoch": 0.1527071183356082, "grad_norm": 1439.9266357421875, "learning_rate": 9.926528484355859e-06, "loss": 131.7167, "step": 18460 }, { "epoch": 0.15278984158497746, "grad_norm": 1190.1434326171875, "learning_rate": 9.926287397491992e-06, "loss": 147.6172, "step": 18470 }, { "epoch": 0.1528725648343467, "grad_norm": 1017.2939453125, "learning_rate": 9.926045918666045e-06, "loss": 144.2414, "step": 18480 }, { "epoch": 0.15295528808371592, "grad_norm": 1303.34814453125, "learning_rate": 9.925804047897231e-06, "loss": 202.542, "step": 18490 }, { "epoch": 0.15303801133308517, "grad_norm": 1302.6015625, "learning_rate": 9.925561785204797e-06, "loss": 150.7994, "step": 18500 }, { "epoch": 0.1531207345824544, "grad_norm": 1482.454345703125, "learning_rate": 9.925319130608015e-06, "loss": 160.8186, "step": 18510 }, { "epoch": 0.15320345783182363, "grad_norm": 1089.9215087890625, "learning_rate": 9.925076084126194e-06, "loss": 140.3311, "step": 18520 }, { "epoch": 0.15328618108119288, "grad_norm": 2586.4873046875, "learning_rate": 9.924832645778674e-06, "loss": 105.1053, "step": 18530 }, { "epoch": 0.1533689043305621, "grad_norm": 986.44775390625, "learning_rate": 9.924588815584822e-06, "loss": 146.6998, "step": 18540 }, { "epoch": 0.15345162757993133, "grad_norm": 1076.886474609375, "learning_rate": 9.924344593564038e-06, "loss": 167.1004, "step": 18550 }, { "epoch": 0.1535343508293006, "grad_norm": 576.464599609375, "learning_rate": 9.924099979735754e-06, "loss": 109.8678, "step": 18560 }, { "epoch": 0.1536170740786698, "grad_norm": 722.518310546875, "learning_rate": 9.923854974119434e-06, "loss": 125.0473, "step": 18570 }, { "epoch": 0.15369979732803904, "grad_norm": 1820.7373046875, "learning_rate": 9.92360957673457e-06, "loss": 141.7229, "step": 18580 }, { "epoch": 0.1537825205774083, "grad_norm": 1114.6781005859375, "learning_rate": 9.923363787600688e-06, "loss": 141.2934, "step": 18590 }, { "epoch": 0.15386524382677752, "grad_norm": 1381.0604248046875, "learning_rate": 9.923117606737347e-06, "loss": 116.5776, "step": 18600 }, { "epoch": 0.15394796707614675, "grad_norm": 1086.9346923828125, "learning_rate": 9.92287103416413e-06, "loss": 191.1018, "step": 18610 }, { "epoch": 0.15403069032551597, "grad_norm": 1572.17529296875, "learning_rate": 9.922624069900658e-06, "loss": 155.7499, "step": 18620 }, { "epoch": 0.15411341357488523, "grad_norm": 1132.931884765625, "learning_rate": 9.922376713966581e-06, "loss": 152.9908, "step": 18630 }, { "epoch": 0.15419613682425445, "grad_norm": 585.323486328125, "learning_rate": 9.92212896638158e-06, "loss": 152.2068, "step": 18640 }, { "epoch": 0.15427886007362368, "grad_norm": 595.2325439453125, "learning_rate": 9.921880827165367e-06, "loss": 118.7037, "step": 18650 }, { "epoch": 0.15436158332299293, "grad_norm": 4941.7626953125, "learning_rate": 9.921632296337683e-06, "loss": 153.8302, "step": 18660 }, { "epoch": 0.15444430657236216, "grad_norm": 1101.675048828125, "learning_rate": 9.921383373918305e-06, "loss": 180.7743, "step": 18670 }, { "epoch": 0.1545270298217314, "grad_norm": 1755.6380615234375, "learning_rate": 9.92113405992704e-06, "loss": 180.545, "step": 18680 }, { "epoch": 0.15460975307110064, "grad_norm": 925.7059326171875, "learning_rate": 9.92088435438372e-06, "loss": 134.731, "step": 18690 }, { "epoch": 0.15469247632046987, "grad_norm": 1003.4811401367188, "learning_rate": 9.920634257308217e-06, "loss": 123.1074, "step": 18700 }, { "epoch": 0.1547751995698391, "grad_norm": 853.6227416992188, "learning_rate": 9.920383768720429e-06, "loss": 150.079, "step": 18710 }, { "epoch": 0.15485792281920835, "grad_norm": 1113.7686767578125, "learning_rate": 9.920132888640286e-06, "loss": 155.0464, "step": 18720 }, { "epoch": 0.15494064606857758, "grad_norm": 1343.3956298828125, "learning_rate": 9.91988161708775e-06, "loss": 158.199, "step": 18730 }, { "epoch": 0.1550233693179468, "grad_norm": 704.8764038085938, "learning_rate": 9.919629954082813e-06, "loss": 153.2144, "step": 18740 }, { "epoch": 0.15510609256731606, "grad_norm": 550.6301879882812, "learning_rate": 9.919377899645497e-06, "loss": 141.5231, "step": 18750 }, { "epoch": 0.15518881581668528, "grad_norm": 658.1661376953125, "learning_rate": 9.91912545379586e-06, "loss": 138.1113, "step": 18760 }, { "epoch": 0.1552715390660545, "grad_norm": 1573.263916015625, "learning_rate": 9.918872616553986e-06, "loss": 129.6509, "step": 18770 }, { "epoch": 0.15535426231542376, "grad_norm": 1778.827392578125, "learning_rate": 9.918619387939991e-06, "loss": 155.8357, "step": 18780 }, { "epoch": 0.155436985564793, "grad_norm": 3056.092041015625, "learning_rate": 9.918365767974025e-06, "loss": 187.3279, "step": 18790 }, { "epoch": 0.15551970881416222, "grad_norm": 905.0897827148438, "learning_rate": 9.91811175667627e-06, "loss": 198.9183, "step": 18800 }, { "epoch": 0.15560243206353144, "grad_norm": 854.0653076171875, "learning_rate": 9.91785735406693e-06, "loss": 111.8965, "step": 18810 }, { "epoch": 0.1556851553129007, "grad_norm": 1693.5703125, "learning_rate": 9.917602560166253e-06, "loss": 138.9856, "step": 18820 }, { "epoch": 0.15576787856226992, "grad_norm": 1688.492431640625, "learning_rate": 9.917347374994507e-06, "loss": 118.313, "step": 18830 }, { "epoch": 0.15585060181163915, "grad_norm": 1094.8780517578125, "learning_rate": 9.917091798571998e-06, "loss": 122.0171, "step": 18840 }, { "epoch": 0.1559333250610084, "grad_norm": 1383.755859375, "learning_rate": 9.916835830919062e-06, "loss": 149.7231, "step": 18850 }, { "epoch": 0.15601604831037763, "grad_norm": 4172.68603515625, "learning_rate": 9.916579472056064e-06, "loss": 164.1563, "step": 18860 }, { "epoch": 0.15609877155974686, "grad_norm": 1112.282958984375, "learning_rate": 9.916322722003402e-06, "loss": 140.6031, "step": 18870 }, { "epoch": 0.1561814948091161, "grad_norm": 1334.9544677734375, "learning_rate": 9.916065580781504e-06, "loss": 125.8786, "step": 18880 }, { "epoch": 0.15626421805848534, "grad_norm": 759.095703125, "learning_rate": 9.91580804841083e-06, "loss": 123.02, "step": 18890 }, { "epoch": 0.15634694130785456, "grad_norm": 923.3683471679688, "learning_rate": 9.915550124911866e-06, "loss": 111.5178, "step": 18900 }, { "epoch": 0.15642966455722382, "grad_norm": 1883.232177734375, "learning_rate": 9.915291810305141e-06, "loss": 153.0945, "step": 18910 }, { "epoch": 0.15651238780659305, "grad_norm": 1571.4327392578125, "learning_rate": 9.915033104611204e-06, "loss": 152.6783, "step": 18920 }, { "epoch": 0.15659511105596227, "grad_norm": 842.9326171875, "learning_rate": 9.914774007850641e-06, "loss": 154.6972, "step": 18930 }, { "epoch": 0.15667783430533153, "grad_norm": 1248.7547607421875, "learning_rate": 9.914514520044065e-06, "loss": 169.4783, "step": 18940 }, { "epoch": 0.15676055755470075, "grad_norm": 976.1325073242188, "learning_rate": 9.914254641212124e-06, "loss": 114.911, "step": 18950 }, { "epoch": 0.15684328080406998, "grad_norm": 893.714111328125, "learning_rate": 9.913994371375494e-06, "loss": 81.8798, "step": 18960 }, { "epoch": 0.1569260040534392, "grad_norm": 1223.4085693359375, "learning_rate": 9.913733710554886e-06, "loss": 138.9431, "step": 18970 }, { "epoch": 0.15700872730280846, "grad_norm": 2076.80712890625, "learning_rate": 9.913472658771034e-06, "loss": 113.3516, "step": 18980 }, { "epoch": 0.15709145055217769, "grad_norm": 1146.1357421875, "learning_rate": 9.913211216044715e-06, "loss": 162.9254, "step": 18990 }, { "epoch": 0.1571741738015469, "grad_norm": 1230.30224609375, "learning_rate": 9.912949382396728e-06, "loss": 197.952, "step": 19000 }, { "epoch": 0.15725689705091617, "grad_norm": 1353.6494140625, "learning_rate": 9.912687157847905e-06, "loss": 137.2512, "step": 19010 }, { "epoch": 0.1573396203002854, "grad_norm": 1508.077392578125, "learning_rate": 9.91242454241911e-06, "loss": 133.2853, "step": 19020 }, { "epoch": 0.15742234354965462, "grad_norm": 935.4700927734375, "learning_rate": 9.912161536131242e-06, "loss": 126.4163, "step": 19030 }, { "epoch": 0.15750506679902387, "grad_norm": 920.0881958007812, "learning_rate": 9.911898139005222e-06, "loss": 106.0859, "step": 19040 }, { "epoch": 0.1575877900483931, "grad_norm": 1098.602783203125, "learning_rate": 9.91163435106201e-06, "loss": 113.0203, "step": 19050 }, { "epoch": 0.15767051329776233, "grad_norm": 529.7808227539062, "learning_rate": 9.911370172322595e-06, "loss": 100.4977, "step": 19060 }, { "epoch": 0.15775323654713158, "grad_norm": 1122.334228515625, "learning_rate": 9.911105602807996e-06, "loss": 147.1685, "step": 19070 }, { "epoch": 0.1578359597965008, "grad_norm": 1302.458740234375, "learning_rate": 9.910840642539261e-06, "loss": 138.237, "step": 19080 }, { "epoch": 0.15791868304587003, "grad_norm": 1324.593505859375, "learning_rate": 9.910575291537476e-06, "loss": 182.2281, "step": 19090 }, { "epoch": 0.1580014062952393, "grad_norm": 746.2387084960938, "learning_rate": 9.91030954982375e-06, "loss": 111.4794, "step": 19100 }, { "epoch": 0.15808412954460851, "grad_norm": 1335.113525390625, "learning_rate": 9.910043417419228e-06, "loss": 148.9087, "step": 19110 }, { "epoch": 0.15816685279397774, "grad_norm": 688.1320190429688, "learning_rate": 9.909776894345086e-06, "loss": 141.3004, "step": 19120 }, { "epoch": 0.158249576043347, "grad_norm": 1126.445068359375, "learning_rate": 9.909509980622532e-06, "loss": 112.016, "step": 19130 }, { "epoch": 0.15833229929271622, "grad_norm": 600.5185546875, "learning_rate": 9.909242676272797e-06, "loss": 114.159, "step": 19140 }, { "epoch": 0.15841502254208545, "grad_norm": 1174.3468017578125, "learning_rate": 9.908974981317155e-06, "loss": 171.2533, "step": 19150 }, { "epoch": 0.15849774579145468, "grad_norm": 795.6885986328125, "learning_rate": 9.9087068957769e-06, "loss": 136.8247, "step": 19160 }, { "epoch": 0.15858046904082393, "grad_norm": 1241.0509033203125, "learning_rate": 9.908438419673367e-06, "loss": 137.2768, "step": 19170 }, { "epoch": 0.15866319229019316, "grad_norm": 761.6776123046875, "learning_rate": 9.908169553027916e-06, "loss": 165.4491, "step": 19180 }, { "epoch": 0.15874591553956238, "grad_norm": 1572.6136474609375, "learning_rate": 9.90790029586194e-06, "loss": 124.7587, "step": 19190 }, { "epoch": 0.15882863878893164, "grad_norm": 732.7517700195312, "learning_rate": 9.907630648196857e-06, "loss": 142.462, "step": 19200 }, { "epoch": 0.15891136203830086, "grad_norm": 957.8698120117188, "learning_rate": 9.907360610054132e-06, "loss": 145.4445, "step": 19210 }, { "epoch": 0.1589940852876701, "grad_norm": 1933.4423828125, "learning_rate": 9.907090181455241e-06, "loss": 126.4228, "step": 19220 }, { "epoch": 0.15907680853703934, "grad_norm": 1341.31591796875, "learning_rate": 9.906819362421707e-06, "loss": 127.2506, "step": 19230 }, { "epoch": 0.15915953178640857, "grad_norm": 1401.2039794921875, "learning_rate": 9.906548152975076e-06, "loss": 142.5762, "step": 19240 }, { "epoch": 0.1592422550357778, "grad_norm": 1090.92578125, "learning_rate": 9.906276553136924e-06, "loss": 133.9682, "step": 19250 }, { "epoch": 0.15932497828514705, "grad_norm": 627.4381713867188, "learning_rate": 9.906004562928865e-06, "loss": 123.456, "step": 19260 }, { "epoch": 0.15940770153451628, "grad_norm": 1303.6290283203125, "learning_rate": 9.905732182372538e-06, "loss": 176.1459, "step": 19270 }, { "epoch": 0.1594904247838855, "grad_norm": 1230.2550048828125, "learning_rate": 9.905459411489617e-06, "loss": 150.8253, "step": 19280 }, { "epoch": 0.15957314803325476, "grad_norm": 557.597900390625, "learning_rate": 9.905186250301802e-06, "loss": 128.1924, "step": 19290 }, { "epoch": 0.15965587128262398, "grad_norm": 1013.2421875, "learning_rate": 9.904912698830828e-06, "loss": 148.6797, "step": 19300 }, { "epoch": 0.1597385945319932, "grad_norm": 1238.0384521484375, "learning_rate": 9.904638757098464e-06, "loss": 143.5567, "step": 19310 }, { "epoch": 0.15982131778136247, "grad_norm": 1117.36962890625, "learning_rate": 9.9043644251265e-06, "loss": 129.1499, "step": 19320 }, { "epoch": 0.1599040410307317, "grad_norm": 1082.633544921875, "learning_rate": 9.90408970293677e-06, "loss": 99.3771, "step": 19330 }, { "epoch": 0.15998676428010092, "grad_norm": 1239.34326171875, "learning_rate": 9.903814590551127e-06, "loss": 152.1191, "step": 19340 }, { "epoch": 0.16006948752947014, "grad_norm": 1026.2008056640625, "learning_rate": 9.903539087991462e-06, "loss": 138.5603, "step": 19350 }, { "epoch": 0.1601522107788394, "grad_norm": 716.8160400390625, "learning_rate": 9.903263195279698e-06, "loss": 121.6254, "step": 19360 }, { "epoch": 0.16023493402820863, "grad_norm": 1355.804931640625, "learning_rate": 9.902986912437784e-06, "loss": 121.697, "step": 19370 }, { "epoch": 0.16031765727757785, "grad_norm": 1001.9555053710938, "learning_rate": 9.902710239487702e-06, "loss": 123.8956, "step": 19380 }, { "epoch": 0.1604003805269471, "grad_norm": 929.7130737304688, "learning_rate": 9.902433176451466e-06, "loss": 108.8211, "step": 19390 }, { "epoch": 0.16048310377631633, "grad_norm": 1794.6314697265625, "learning_rate": 9.902155723351124e-06, "loss": 119.6667, "step": 19400 }, { "epoch": 0.16056582702568556, "grad_norm": 981.8839721679688, "learning_rate": 9.901877880208747e-06, "loss": 123.9001, "step": 19410 }, { "epoch": 0.1606485502750548, "grad_norm": 1454.4476318359375, "learning_rate": 9.901599647046443e-06, "loss": 131.0193, "step": 19420 }, { "epoch": 0.16073127352442404, "grad_norm": 1284.30224609375, "learning_rate": 9.901321023886351e-06, "loss": 169.1719, "step": 19430 }, { "epoch": 0.16081399677379327, "grad_norm": 1159.77783203125, "learning_rate": 9.901042010750641e-06, "loss": 100.9739, "step": 19440 }, { "epoch": 0.16089672002316252, "grad_norm": 1844.110107421875, "learning_rate": 9.900762607661509e-06, "loss": 153.9659, "step": 19450 }, { "epoch": 0.16097944327253175, "grad_norm": 701.9683227539062, "learning_rate": 9.900482814641188e-06, "loss": 109.9286, "step": 19460 }, { "epoch": 0.16106216652190097, "grad_norm": 1962.1533203125, "learning_rate": 9.90020263171194e-06, "loss": 158.6689, "step": 19470 }, { "epoch": 0.16114488977127023, "grad_norm": 1527.3931884765625, "learning_rate": 9.899922058896058e-06, "loss": 129.6219, "step": 19480 }, { "epoch": 0.16122761302063945, "grad_norm": 935.3746337890625, "learning_rate": 9.899641096215865e-06, "loss": 187.1026, "step": 19490 }, { "epoch": 0.16131033627000868, "grad_norm": 1236.936279296875, "learning_rate": 9.899359743693715e-06, "loss": 194.122, "step": 19500 }, { "epoch": 0.16139305951937793, "grad_norm": 2321.50439453125, "learning_rate": 9.899078001351996e-06, "loss": 164.5937, "step": 19510 }, { "epoch": 0.16147578276874716, "grad_norm": 1305.2666015625, "learning_rate": 9.898795869213125e-06, "loss": 149.4349, "step": 19520 }, { "epoch": 0.1615585060181164, "grad_norm": 1175.187744140625, "learning_rate": 9.898513347299549e-06, "loss": 142.7042, "step": 19530 }, { "epoch": 0.16164122926748561, "grad_norm": 1031.960693359375, "learning_rate": 9.898230435633747e-06, "loss": 151.7943, "step": 19540 }, { "epoch": 0.16172395251685487, "grad_norm": 1225.9884033203125, "learning_rate": 9.897947134238228e-06, "loss": 162.5945, "step": 19550 }, { "epoch": 0.1618066757662241, "grad_norm": 1006.8120727539062, "learning_rate": 9.897663443135534e-06, "loss": 127.9761, "step": 19560 }, { "epoch": 0.16188939901559332, "grad_norm": 2127.416748046875, "learning_rate": 9.897379362348239e-06, "loss": 171.9894, "step": 19570 }, { "epoch": 0.16197212226496258, "grad_norm": 745.5164794921875, "learning_rate": 9.897094891898942e-06, "loss": 150.5477, "step": 19580 }, { "epoch": 0.1620548455143318, "grad_norm": 2045.43896484375, "learning_rate": 9.89681003181028e-06, "loss": 143.4611, "step": 19590 }, { "epoch": 0.16213756876370103, "grad_norm": 1801.7623291015625, "learning_rate": 9.896524782104917e-06, "loss": 136.5524, "step": 19600 }, { "epoch": 0.16222029201307028, "grad_norm": 865.27001953125, "learning_rate": 9.89623914280555e-06, "loss": 135.0138, "step": 19610 }, { "epoch": 0.1623030152624395, "grad_norm": 1108.943359375, "learning_rate": 9.895953113934904e-06, "loss": 131.2855, "step": 19620 }, { "epoch": 0.16238573851180874, "grad_norm": 1284.2874755859375, "learning_rate": 9.895666695515739e-06, "loss": 158.5307, "step": 19630 }, { "epoch": 0.162468461761178, "grad_norm": 1160.2169189453125, "learning_rate": 9.895379887570842e-06, "loss": 146.816, "step": 19640 }, { "epoch": 0.16255118501054722, "grad_norm": 1387.5469970703125, "learning_rate": 9.895092690123036e-06, "loss": 130.8016, "step": 19650 }, { "epoch": 0.16263390825991644, "grad_norm": 1327.9295654296875, "learning_rate": 9.894805103195168e-06, "loss": 131.4063, "step": 19660 }, { "epoch": 0.1627166315092857, "grad_norm": 1514.6529541015625, "learning_rate": 9.894517126810122e-06, "loss": 209.5621, "step": 19670 }, { "epoch": 0.16279935475865492, "grad_norm": 2022.22021484375, "learning_rate": 9.894228760990811e-06, "loss": 152.1554, "step": 19680 }, { "epoch": 0.16288207800802415, "grad_norm": 1740.86767578125, "learning_rate": 9.893940005760181e-06, "loss": 154.0035, "step": 19690 }, { "epoch": 0.16296480125739338, "grad_norm": 1580.342529296875, "learning_rate": 9.893650861141204e-06, "loss": 157.6928, "step": 19700 }, { "epoch": 0.16304752450676263, "grad_norm": 1006.531494140625, "learning_rate": 9.893361327156887e-06, "loss": 127.0846, "step": 19710 }, { "epoch": 0.16313024775613186, "grad_norm": 1319.7847900390625, "learning_rate": 9.893071403830265e-06, "loss": 136.8425, "step": 19720 }, { "epoch": 0.16321297100550108, "grad_norm": 830.5723876953125, "learning_rate": 9.892781091184409e-06, "loss": 136.2878, "step": 19730 }, { "epoch": 0.16329569425487034, "grad_norm": 759.5004272460938, "learning_rate": 9.892490389242417e-06, "loss": 120.3061, "step": 19740 }, { "epoch": 0.16337841750423956, "grad_norm": 2213.310546875, "learning_rate": 9.892199298027416e-06, "loss": 143.1016, "step": 19750 }, { "epoch": 0.1634611407536088, "grad_norm": 1020.21337890625, "learning_rate": 9.891907817562572e-06, "loss": 116.1548, "step": 19760 }, { "epoch": 0.16354386400297805, "grad_norm": 810.1748657226562, "learning_rate": 9.891615947871072e-06, "loss": 141.0581, "step": 19770 }, { "epoch": 0.16362658725234727, "grad_norm": 810.9425048828125, "learning_rate": 9.89132368897614e-06, "loss": 130.3051, "step": 19780 }, { "epoch": 0.1637093105017165, "grad_norm": 1321.05908203125, "learning_rate": 9.891031040901031e-06, "loss": 154.2215, "step": 19790 }, { "epoch": 0.16379203375108575, "grad_norm": 1216.8099365234375, "learning_rate": 9.890738003669029e-06, "loss": 164.4314, "step": 19800 }, { "epoch": 0.16387475700045498, "grad_norm": 2961.08447265625, "learning_rate": 9.890444577303448e-06, "loss": 184.4128, "step": 19810 }, { "epoch": 0.1639574802498242, "grad_norm": 702.2813110351562, "learning_rate": 9.890150761827639e-06, "loss": 118.5094, "step": 19820 }, { "epoch": 0.16404020349919346, "grad_norm": 1224.574951171875, "learning_rate": 9.889856557264975e-06, "loss": 164.4189, "step": 19830 }, { "epoch": 0.1641229267485627, "grad_norm": 1510.7064208984375, "learning_rate": 9.889561963638866e-06, "loss": 168.8556, "step": 19840 }, { "epoch": 0.1642056499979319, "grad_norm": 1308.8349609375, "learning_rate": 9.889266980972752e-06, "loss": 157.53, "step": 19850 }, { "epoch": 0.16428837324730117, "grad_norm": 554.7841796875, "learning_rate": 9.888971609290103e-06, "loss": 123.5679, "step": 19860 }, { "epoch": 0.1643710964966704, "grad_norm": 1066.4405517578125, "learning_rate": 9.88867584861442e-06, "loss": 115.5183, "step": 19870 }, { "epoch": 0.16445381974603962, "grad_norm": 823.9727172851562, "learning_rate": 9.888379698969236e-06, "loss": 127.0505, "step": 19880 }, { "epoch": 0.16453654299540885, "grad_norm": 2073.93017578125, "learning_rate": 9.888083160378114e-06, "loss": 142.6533, "step": 19890 }, { "epoch": 0.1646192662447781, "grad_norm": 2434.051513671875, "learning_rate": 9.887786232864648e-06, "loss": 147.1622, "step": 19900 }, { "epoch": 0.16470198949414733, "grad_norm": 724.6605224609375, "learning_rate": 9.887488916452463e-06, "loss": 121.8898, "step": 19910 }, { "epoch": 0.16478471274351655, "grad_norm": 1281.83203125, "learning_rate": 9.887191211165217e-06, "loss": 151.6535, "step": 19920 }, { "epoch": 0.1648674359928858, "grad_norm": 1131.1641845703125, "learning_rate": 9.886893117026593e-06, "loss": 149.5577, "step": 19930 }, { "epoch": 0.16495015924225503, "grad_norm": 1077.4385986328125, "learning_rate": 9.886594634060314e-06, "loss": 203.1148, "step": 19940 }, { "epoch": 0.16503288249162426, "grad_norm": 1563.59228515625, "learning_rate": 9.886295762290125e-06, "loss": 156.8315, "step": 19950 }, { "epoch": 0.16511560574099352, "grad_norm": 728.146240234375, "learning_rate": 9.885996501739808e-06, "loss": 123.6347, "step": 19960 }, { "epoch": 0.16519832899036274, "grad_norm": 1174.09521484375, "learning_rate": 9.885696852433174e-06, "loss": 171.4022, "step": 19970 }, { "epoch": 0.16528105223973197, "grad_norm": 2437.55908203125, "learning_rate": 9.885396814394062e-06, "loss": 166.2973, "step": 19980 }, { "epoch": 0.16536377548910122, "grad_norm": 756.368896484375, "learning_rate": 9.885096387646346e-06, "loss": 102.5183, "step": 19990 }, { "epoch": 0.16544649873847045, "grad_norm": 1208.659423828125, "learning_rate": 9.88479557221393e-06, "loss": 146.3919, "step": 20000 }, { "epoch": 0.16552922198783968, "grad_norm": 793.578857421875, "learning_rate": 9.88449436812075e-06, "loss": 151.3374, "step": 20010 }, { "epoch": 0.16561194523720893, "grad_norm": 3023.392333984375, "learning_rate": 9.88419277539077e-06, "loss": 147.2389, "step": 20020 }, { "epoch": 0.16569466848657816, "grad_norm": 1020.14404296875, "learning_rate": 9.883890794047985e-06, "loss": 133.7473, "step": 20030 }, { "epoch": 0.16577739173594738, "grad_norm": 925.8684692382812, "learning_rate": 9.883588424116424e-06, "loss": 145.2095, "step": 20040 }, { "epoch": 0.16586011498531664, "grad_norm": 1657.950927734375, "learning_rate": 9.883285665620145e-06, "loss": 131.4692, "step": 20050 }, { "epoch": 0.16594283823468586, "grad_norm": 1638.5106201171875, "learning_rate": 9.882982518583238e-06, "loss": 120.6384, "step": 20060 }, { "epoch": 0.1660255614840551, "grad_norm": 994.8275146484375, "learning_rate": 9.882678983029819e-06, "loss": 191.7884, "step": 20070 }, { "epoch": 0.16610828473342432, "grad_norm": 1858.4609375, "learning_rate": 9.882375058984044e-06, "loss": 145.8128, "step": 20080 }, { "epoch": 0.16619100798279357, "grad_norm": 640.3125, "learning_rate": 9.882070746470092e-06, "loss": 113.2083, "step": 20090 }, { "epoch": 0.1662737312321628, "grad_norm": 1469.511474609375, "learning_rate": 9.881766045512176e-06, "loss": 189.0106, "step": 20100 }, { "epoch": 0.16635645448153202, "grad_norm": 740.4965209960938, "learning_rate": 9.88146095613454e-06, "loss": 130.8047, "step": 20110 }, { "epoch": 0.16643917773090128, "grad_norm": 683.9896240234375, "learning_rate": 9.881155478361459e-06, "loss": 175.5372, "step": 20120 }, { "epoch": 0.1665219009802705, "grad_norm": 938.4227905273438, "learning_rate": 9.880849612217238e-06, "loss": 108.5235, "step": 20130 }, { "epoch": 0.16660462422963973, "grad_norm": 841.4732666015625, "learning_rate": 9.880543357726214e-06, "loss": 142.9208, "step": 20140 }, { "epoch": 0.16668734747900898, "grad_norm": 764.7952880859375, "learning_rate": 9.880236714912754e-06, "loss": 136.3933, "step": 20150 }, { "epoch": 0.1667700707283782, "grad_norm": 1391.5673828125, "learning_rate": 9.879929683801254e-06, "loss": 138.4007, "step": 20160 }, { "epoch": 0.16685279397774744, "grad_norm": 1550.94873046875, "learning_rate": 9.879622264416147e-06, "loss": 147.9795, "step": 20170 }, { "epoch": 0.1669355172271167, "grad_norm": 879.2140502929688, "learning_rate": 9.87931445678189e-06, "loss": 155.5872, "step": 20180 }, { "epoch": 0.16701824047648592, "grad_norm": 584.1538696289062, "learning_rate": 9.879006260922975e-06, "loss": 98.7441, "step": 20190 }, { "epoch": 0.16710096372585515, "grad_norm": 719.3741455078125, "learning_rate": 9.878697676863922e-06, "loss": 126.3837, "step": 20200 }, { "epoch": 0.1671836869752244, "grad_norm": 1768.941162109375, "learning_rate": 9.878388704629286e-06, "loss": 181.439, "step": 20210 }, { "epoch": 0.16726641022459363, "grad_norm": 1062.1995849609375, "learning_rate": 9.87807934424365e-06, "loss": 155.1683, "step": 20220 }, { "epoch": 0.16734913347396285, "grad_norm": 1074.44482421875, "learning_rate": 9.877769595731629e-06, "loss": 149.4426, "step": 20230 }, { "epoch": 0.16743185672333208, "grad_norm": 766.1312866210938, "learning_rate": 9.877459459117864e-06, "loss": 133.7859, "step": 20240 }, { "epoch": 0.16751457997270133, "grad_norm": 1180.6207275390625, "learning_rate": 9.877148934427037e-06, "loss": 185.559, "step": 20250 }, { "epoch": 0.16759730322207056, "grad_norm": 936.2619018554688, "learning_rate": 9.87683802168385e-06, "loss": 153.8027, "step": 20260 }, { "epoch": 0.16768002647143979, "grad_norm": 1289.442626953125, "learning_rate": 9.876526720913045e-06, "loss": 146.8949, "step": 20270 }, { "epoch": 0.16776274972080904, "grad_norm": 1198.4373779296875, "learning_rate": 9.87621503213939e-06, "loss": 101.3234, "step": 20280 }, { "epoch": 0.16784547297017827, "grad_norm": 1139.7901611328125, "learning_rate": 9.875902955387682e-06, "loss": 105.7266, "step": 20290 }, { "epoch": 0.1679281962195475, "grad_norm": 885.1135864257812, "learning_rate": 9.875590490682754e-06, "loss": 139.6578, "step": 20300 }, { "epoch": 0.16801091946891675, "grad_norm": 1269.400146484375, "learning_rate": 9.875277638049466e-06, "loss": 148.561, "step": 20310 }, { "epoch": 0.16809364271828597, "grad_norm": 1671.2281494140625, "learning_rate": 9.87496439751271e-06, "loss": 157.8267, "step": 20320 }, { "epoch": 0.1681763659676552, "grad_norm": 913.9154663085938, "learning_rate": 9.87465076909741e-06, "loss": 141.2827, "step": 20330 }, { "epoch": 0.16825908921702445, "grad_norm": 1082.798583984375, "learning_rate": 9.874336752828523e-06, "loss": 150.5321, "step": 20340 }, { "epoch": 0.16834181246639368, "grad_norm": 967.8886108398438, "learning_rate": 9.87402234873103e-06, "loss": 142.4086, "step": 20350 }, { "epoch": 0.1684245357157629, "grad_norm": 1056.4305419921875, "learning_rate": 9.873707556829945e-06, "loss": 97.3924, "step": 20360 }, { "epoch": 0.16850725896513216, "grad_norm": 837.5071411132812, "learning_rate": 9.873392377150318e-06, "loss": 123.7359, "step": 20370 }, { "epoch": 0.1685899822145014, "grad_norm": 874.9818115234375, "learning_rate": 9.873076809717226e-06, "loss": 109.1823, "step": 20380 }, { "epoch": 0.16867270546387061, "grad_norm": 727.0296020507812, "learning_rate": 9.872760854555776e-06, "loss": 143.6749, "step": 20390 }, { "epoch": 0.16875542871323987, "grad_norm": 1192.26904296875, "learning_rate": 9.872444511691108e-06, "loss": 128.2298, "step": 20400 }, { "epoch": 0.1688381519626091, "grad_norm": 765.3291015625, "learning_rate": 9.872127781148392e-06, "loss": 140.1519, "step": 20410 }, { "epoch": 0.16892087521197832, "grad_norm": 1048.41064453125, "learning_rate": 9.871810662952828e-06, "loss": 153.647, "step": 20420 }, { "epoch": 0.16900359846134755, "grad_norm": 1050.804931640625, "learning_rate": 9.87149315712965e-06, "loss": 160.4528, "step": 20430 }, { "epoch": 0.1690863217107168, "grad_norm": 1345.5584716796875, "learning_rate": 9.871175263704116e-06, "loss": 119.833, "step": 20440 }, { "epoch": 0.16916904496008603, "grad_norm": 1076.057861328125, "learning_rate": 9.870856982701522e-06, "loss": 123.713, "step": 20450 }, { "epoch": 0.16925176820945526, "grad_norm": 1091.8779296875, "learning_rate": 9.870538314147194e-06, "loss": 157.7124, "step": 20460 }, { "epoch": 0.1693344914588245, "grad_norm": 1105.4384765625, "learning_rate": 9.870219258066485e-06, "loss": 165.7277, "step": 20470 }, { "epoch": 0.16941721470819374, "grad_norm": 1101.884033203125, "learning_rate": 9.86989981448478e-06, "loss": 93.6696, "step": 20480 }, { "epoch": 0.16949993795756296, "grad_norm": 1253.4547119140625, "learning_rate": 9.869579983427497e-06, "loss": 156.5219, "step": 20490 }, { "epoch": 0.16958266120693222, "grad_norm": 571.4030151367188, "learning_rate": 9.869259764920081e-06, "loss": 108.441, "step": 20500 }, { "epoch": 0.16966538445630144, "grad_norm": 1125.4053955078125, "learning_rate": 9.868939158988016e-06, "loss": 111.0379, "step": 20510 }, { "epoch": 0.16974810770567067, "grad_norm": 1643.4638671875, "learning_rate": 9.868618165656805e-06, "loss": 174.5399, "step": 20520 }, { "epoch": 0.16983083095503992, "grad_norm": 1219.7093505859375, "learning_rate": 9.868296784951992e-06, "loss": 143.0416, "step": 20530 }, { "epoch": 0.16991355420440915, "grad_norm": 716.170654296875, "learning_rate": 9.867975016899145e-06, "loss": 129.3315, "step": 20540 }, { "epoch": 0.16999627745377838, "grad_norm": 722.2046508789062, "learning_rate": 9.867652861523866e-06, "loss": 110.3743, "step": 20550 }, { "epoch": 0.17007900070314763, "grad_norm": 8400.3681640625, "learning_rate": 9.86733031885179e-06, "loss": 293.1446, "step": 20560 }, { "epoch": 0.17016172395251686, "grad_norm": 1231.3631591796875, "learning_rate": 9.867007388908579e-06, "loss": 158.7573, "step": 20570 }, { "epoch": 0.17024444720188608, "grad_norm": 813.4060668945312, "learning_rate": 9.866684071719926e-06, "loss": 114.0276, "step": 20580 }, { "epoch": 0.17032717045125534, "grad_norm": 691.1693115234375, "learning_rate": 9.866360367311557e-06, "loss": 124.2069, "step": 20590 }, { "epoch": 0.17040989370062457, "grad_norm": 1228.2000732421875, "learning_rate": 9.866036275709226e-06, "loss": 113.3982, "step": 20600 }, { "epoch": 0.1704926169499938, "grad_norm": 1183.33935546875, "learning_rate": 9.86571179693872e-06, "loss": 124.6813, "step": 20610 }, { "epoch": 0.17057534019936302, "grad_norm": 1132.4244384765625, "learning_rate": 9.865386931025858e-06, "loss": 109.3566, "step": 20620 }, { "epoch": 0.17065806344873227, "grad_norm": 536.39453125, "learning_rate": 9.865061677996487e-06, "loss": 94.1924, "step": 20630 }, { "epoch": 0.1707407866981015, "grad_norm": 1061.803955078125, "learning_rate": 9.864736037876487e-06, "loss": 96.9849, "step": 20640 }, { "epoch": 0.17082350994747073, "grad_norm": 1086.843505859375, "learning_rate": 9.864410010691766e-06, "loss": 158.8272, "step": 20650 }, { "epoch": 0.17090623319683998, "grad_norm": 1095.378662109375, "learning_rate": 9.864083596468263e-06, "loss": 165.8036, "step": 20660 }, { "epoch": 0.1709889564462092, "grad_norm": 773.133544921875, "learning_rate": 9.863756795231953e-06, "loss": 107.4877, "step": 20670 }, { "epoch": 0.17107167969557843, "grad_norm": 922.110107421875, "learning_rate": 9.863429607008837e-06, "loss": 152.2869, "step": 20680 }, { "epoch": 0.1711544029449477, "grad_norm": 828.44677734375, "learning_rate": 9.863102031824946e-06, "loss": 103.4225, "step": 20690 }, { "epoch": 0.1712371261943169, "grad_norm": 1600.447021484375, "learning_rate": 9.862774069706346e-06, "loss": 116.9802, "step": 20700 }, { "epoch": 0.17131984944368614, "grad_norm": 1819.4007568359375, "learning_rate": 9.86244572067913e-06, "loss": 193.74, "step": 20710 }, { "epoch": 0.1714025726930554, "grad_norm": 1109.385009765625, "learning_rate": 9.862116984769424e-06, "loss": 155.2172, "step": 20720 }, { "epoch": 0.17148529594242462, "grad_norm": 1088.8150634765625, "learning_rate": 9.861787862003384e-06, "loss": 102.3083, "step": 20730 }, { "epoch": 0.17156801919179385, "grad_norm": 863.4269409179688, "learning_rate": 9.861458352407196e-06, "loss": 163.3116, "step": 20740 }, { "epoch": 0.1716507424411631, "grad_norm": 985.338623046875, "learning_rate": 9.861128456007076e-06, "loss": 128.2076, "step": 20750 }, { "epoch": 0.17173346569053233, "grad_norm": 2616.19189453125, "learning_rate": 9.860798172829277e-06, "loss": 128.8503, "step": 20760 }, { "epoch": 0.17181618893990155, "grad_norm": 821.7667846679688, "learning_rate": 9.860467502900076e-06, "loss": 139.1303, "step": 20770 }, { "epoch": 0.1718989121892708, "grad_norm": 1093.295654296875, "learning_rate": 9.860136446245779e-06, "loss": 158.2073, "step": 20780 }, { "epoch": 0.17198163543864003, "grad_norm": 2161.525390625, "learning_rate": 9.859805002892733e-06, "loss": 159.4854, "step": 20790 }, { "epoch": 0.17206435868800926, "grad_norm": 936.6251831054688, "learning_rate": 9.859473172867304e-06, "loss": 150.7438, "step": 20800 }, { "epoch": 0.1721470819373785, "grad_norm": 735.8521118164062, "learning_rate": 9.859140956195898e-06, "loss": 166.0925, "step": 20810 }, { "epoch": 0.17222980518674774, "grad_norm": 1382.367431640625, "learning_rate": 9.858808352904946e-06, "loss": 163.85, "step": 20820 }, { "epoch": 0.17231252843611697, "grad_norm": 1131.4239501953125, "learning_rate": 9.858475363020913e-06, "loss": 138.0052, "step": 20830 }, { "epoch": 0.1723952516854862, "grad_norm": 1062.67431640625, "learning_rate": 9.858141986570294e-06, "loss": 127.1865, "step": 20840 }, { "epoch": 0.17247797493485545, "grad_norm": 790.3716430664062, "learning_rate": 9.85780822357961e-06, "loss": 99.19, "step": 20850 }, { "epoch": 0.17256069818422468, "grad_norm": 1042.801025390625, "learning_rate": 9.857474074075422e-06, "loss": 152.4688, "step": 20860 }, { "epoch": 0.1726434214335939, "grad_norm": 872.380126953125, "learning_rate": 9.857139538084313e-06, "loss": 180.6786, "step": 20870 }, { "epoch": 0.17272614468296316, "grad_norm": 873.7167358398438, "learning_rate": 9.856804615632904e-06, "loss": 153.8926, "step": 20880 }, { "epoch": 0.17280886793233238, "grad_norm": 1261.3304443359375, "learning_rate": 9.85646930674784e-06, "loss": 138.4761, "step": 20890 }, { "epoch": 0.1728915911817016, "grad_norm": 1224.0684814453125, "learning_rate": 9.856133611455802e-06, "loss": 116.0446, "step": 20900 }, { "epoch": 0.17297431443107086, "grad_norm": 1034.66552734375, "learning_rate": 9.855797529783499e-06, "loss": 143.5475, "step": 20910 }, { "epoch": 0.1730570376804401, "grad_norm": 1428.3978271484375, "learning_rate": 9.855461061757673e-06, "loss": 202.7229, "step": 20920 }, { "epoch": 0.17313976092980932, "grad_norm": 1692.571533203125, "learning_rate": 9.855124207405093e-06, "loss": 129.3957, "step": 20930 }, { "epoch": 0.17322248417917857, "grad_norm": 901.4539184570312, "learning_rate": 9.854786966752561e-06, "loss": 114.9998, "step": 20940 }, { "epoch": 0.1733052074285478, "grad_norm": 1734.4339599609375, "learning_rate": 9.854449339826912e-06, "loss": 126.6563, "step": 20950 }, { "epoch": 0.17338793067791702, "grad_norm": 936.6329956054688, "learning_rate": 9.854111326655006e-06, "loss": 148.8187, "step": 20960 }, { "epoch": 0.17347065392728625, "grad_norm": 792.8075561523438, "learning_rate": 9.85377292726374e-06, "loss": 140.2986, "step": 20970 }, { "epoch": 0.1735533771766555, "grad_norm": 852.9043579101562, "learning_rate": 9.85343414168004e-06, "loss": 142.9736, "step": 20980 }, { "epoch": 0.17363610042602473, "grad_norm": 615.2127685546875, "learning_rate": 9.853094969930857e-06, "loss": 131.7546, "step": 20990 }, { "epoch": 0.17371882367539396, "grad_norm": 1290.294921875, "learning_rate": 9.85275541204318e-06, "loss": 157.3864, "step": 21000 }, { "epoch": 0.1738015469247632, "grad_norm": 428.7129821777344, "learning_rate": 9.852415468044027e-06, "loss": 117.3043, "step": 21010 }, { "epoch": 0.17388427017413244, "grad_norm": 1278.5567626953125, "learning_rate": 9.852075137960446e-06, "loss": 136.686, "step": 21020 }, { "epoch": 0.17396699342350166, "grad_norm": 1619.6778564453125, "learning_rate": 9.851734421819511e-06, "loss": 136.3727, "step": 21030 }, { "epoch": 0.17404971667287092, "grad_norm": 1088.0845947265625, "learning_rate": 9.851393319648338e-06, "loss": 114.8988, "step": 21040 }, { "epoch": 0.17413243992224015, "grad_norm": 738.9354858398438, "learning_rate": 9.851051831474062e-06, "loss": 138.9047, "step": 21050 }, { "epoch": 0.17421516317160937, "grad_norm": 926.2405395507812, "learning_rate": 9.850709957323855e-06, "loss": 127.3761, "step": 21060 }, { "epoch": 0.17429788642097863, "grad_norm": 911.6777954101562, "learning_rate": 9.85036769722492e-06, "loss": 163.0244, "step": 21070 }, { "epoch": 0.17438060967034785, "grad_norm": 868.7709350585938, "learning_rate": 9.850025051204484e-06, "loss": 139.7337, "step": 21080 }, { "epoch": 0.17446333291971708, "grad_norm": 810.2794189453125, "learning_rate": 9.849682019289816e-06, "loss": 129.7191, "step": 21090 }, { "epoch": 0.17454605616908633, "grad_norm": 862.4880981445312, "learning_rate": 9.849338601508204e-06, "loss": 110.1159, "step": 21100 }, { "epoch": 0.17462877941845556, "grad_norm": 1751.4161376953125, "learning_rate": 9.848994797886978e-06, "loss": 162.2478, "step": 21110 }, { "epoch": 0.1747115026678248, "grad_norm": 961.6451416015625, "learning_rate": 9.84865060845349e-06, "loss": 151.0574, "step": 21120 }, { "epoch": 0.17479422591719404, "grad_norm": 984.0369873046875, "learning_rate": 9.848306033235123e-06, "loss": 114.3529, "step": 21130 }, { "epoch": 0.17487694916656327, "grad_norm": 1510.6654052734375, "learning_rate": 9.847961072259298e-06, "loss": 142.4745, "step": 21140 }, { "epoch": 0.1749596724159325, "grad_norm": 578.0482177734375, "learning_rate": 9.847615725553457e-06, "loss": 149.1546, "step": 21150 }, { "epoch": 0.17504239566530172, "grad_norm": 2596.45654296875, "learning_rate": 9.847269993145082e-06, "loss": 140.9354, "step": 21160 }, { "epoch": 0.17512511891467097, "grad_norm": 1056.8643798828125, "learning_rate": 9.84692387506168e-06, "loss": 138.6333, "step": 21170 }, { "epoch": 0.1752078421640402, "grad_norm": 543.871826171875, "learning_rate": 9.846577371330788e-06, "loss": 100.0174, "step": 21180 }, { "epoch": 0.17529056541340943, "grad_norm": 505.1315002441406, "learning_rate": 9.846230481979978e-06, "loss": 126.8892, "step": 21190 }, { "epoch": 0.17537328866277868, "grad_norm": 1152.1531982421875, "learning_rate": 9.84588320703685e-06, "loss": 137.2241, "step": 21200 }, { "epoch": 0.1754560119121479, "grad_norm": 2514.837646484375, "learning_rate": 9.845535546529036e-06, "loss": 150.1807, "step": 21210 }, { "epoch": 0.17553873516151713, "grad_norm": 1177.4676513671875, "learning_rate": 9.845187500484194e-06, "loss": 136.4538, "step": 21220 }, { "epoch": 0.1756214584108864, "grad_norm": 1041.1258544921875, "learning_rate": 9.844839068930021e-06, "loss": 131.5591, "step": 21230 }, { "epoch": 0.17570418166025562, "grad_norm": 907.1217651367188, "learning_rate": 9.844490251894237e-06, "loss": 129.923, "step": 21240 }, { "epoch": 0.17578690490962484, "grad_norm": 917.8026123046875, "learning_rate": 9.844141049404598e-06, "loss": 112.846, "step": 21250 }, { "epoch": 0.1758696281589941, "grad_norm": 1224.1368408203125, "learning_rate": 9.843791461488887e-06, "loss": 142.4482, "step": 21260 }, { "epoch": 0.17595235140836332, "grad_norm": 1229.176513671875, "learning_rate": 9.843441488174918e-06, "loss": 103.1861, "step": 21270 }, { "epoch": 0.17603507465773255, "grad_norm": 850.4046630859375, "learning_rate": 9.843091129490539e-06, "loss": 127.6695, "step": 21280 }, { "epoch": 0.1761177979071018, "grad_norm": 842.6237182617188, "learning_rate": 9.842740385463628e-06, "loss": 150.564, "step": 21290 }, { "epoch": 0.17620052115647103, "grad_norm": 1639.1712646484375, "learning_rate": 9.842389256122086e-06, "loss": 154.4747, "step": 21300 }, { "epoch": 0.17628324440584026, "grad_norm": 1027.3468017578125, "learning_rate": 9.842037741493856e-06, "loss": 138.2844, "step": 21310 }, { "epoch": 0.1763659676552095, "grad_norm": 1297.7607421875, "learning_rate": 9.841685841606905e-06, "loss": 171.9979, "step": 21320 }, { "epoch": 0.17644869090457874, "grad_norm": 650.8685302734375, "learning_rate": 9.841333556489232e-06, "loss": 138.841, "step": 21330 }, { "epoch": 0.17653141415394796, "grad_norm": 1357.2320556640625, "learning_rate": 9.840980886168866e-06, "loss": 151.9759, "step": 21340 }, { "epoch": 0.1766141374033172, "grad_norm": 938.8858032226562, "learning_rate": 9.840627830673867e-06, "loss": 141.0563, "step": 21350 }, { "epoch": 0.17669686065268644, "grad_norm": 1711.847412109375, "learning_rate": 9.84027439003233e-06, "loss": 172.4341, "step": 21360 }, { "epoch": 0.17677958390205567, "grad_norm": 854.195068359375, "learning_rate": 9.839920564272372e-06, "loss": 137.6896, "step": 21370 }, { "epoch": 0.1768623071514249, "grad_norm": 1087.6898193359375, "learning_rate": 9.839566353422148e-06, "loss": 117.6248, "step": 21380 }, { "epoch": 0.17694503040079415, "grad_norm": 2788.3251953125, "learning_rate": 9.839211757509838e-06, "loss": 144.8487, "step": 21390 }, { "epoch": 0.17702775365016338, "grad_norm": 1420.71875, "learning_rate": 9.83885677656366e-06, "loss": 122.336, "step": 21400 }, { "epoch": 0.1771104768995326, "grad_norm": 761.9530029296875, "learning_rate": 9.838501410611852e-06, "loss": 132.9662, "step": 21410 }, { "epoch": 0.17719320014890186, "grad_norm": 2090.32666015625, "learning_rate": 9.838145659682695e-06, "loss": 183.7629, "step": 21420 }, { "epoch": 0.17727592339827108, "grad_norm": 3058.696044921875, "learning_rate": 9.837789523804491e-06, "loss": 142.1503, "step": 21430 }, { "epoch": 0.1773586466476403, "grad_norm": 1059.2083740234375, "learning_rate": 9.837433003005578e-06, "loss": 137.1923, "step": 21440 }, { "epoch": 0.17744136989700957, "grad_norm": 1151.251708984375, "learning_rate": 9.83707609731432e-06, "loss": 89.3938, "step": 21450 }, { "epoch": 0.1775240931463788, "grad_norm": 749.4866333007812, "learning_rate": 9.836718806759119e-06, "loss": 145.9112, "step": 21460 }, { "epoch": 0.17760681639574802, "grad_norm": 1220.1741943359375, "learning_rate": 9.836361131368398e-06, "loss": 179.5797, "step": 21470 }, { "epoch": 0.17768953964511727, "grad_norm": 446.74859619140625, "learning_rate": 9.836003071170617e-06, "loss": 156.5806, "step": 21480 }, { "epoch": 0.1777722628944865, "grad_norm": 872.1563720703125, "learning_rate": 9.835644626194268e-06, "loss": 141.7276, "step": 21490 }, { "epoch": 0.17785498614385573, "grad_norm": 1689.736328125, "learning_rate": 9.835285796467867e-06, "loss": 133.7971, "step": 21500 }, { "epoch": 0.17793770939322495, "grad_norm": 1551.1575927734375, "learning_rate": 9.834926582019968e-06, "loss": 156.3386, "step": 21510 }, { "epoch": 0.1780204326425942, "grad_norm": 1045.95849609375, "learning_rate": 9.834566982879149e-06, "loss": 118.8335, "step": 21520 }, { "epoch": 0.17810315589196343, "grad_norm": 1034.3304443359375, "learning_rate": 9.83420699907402e-06, "loss": 136.8709, "step": 21530 }, { "epoch": 0.17818587914133266, "grad_norm": 870.5877075195312, "learning_rate": 9.83384663063323e-06, "loss": 125.3036, "step": 21540 }, { "epoch": 0.1782686023907019, "grad_norm": 3151.6611328125, "learning_rate": 9.833485877585447e-06, "loss": 230.3187, "step": 21550 }, { "epoch": 0.17835132564007114, "grad_norm": 1180.687255859375, "learning_rate": 9.833124739959375e-06, "loss": 128.1897, "step": 21560 }, { "epoch": 0.17843404888944037, "grad_norm": 1331.4376220703125, "learning_rate": 9.83276321778375e-06, "loss": 161.2605, "step": 21570 }, { "epoch": 0.17851677213880962, "grad_norm": 347.8481750488281, "learning_rate": 9.832401311087334e-06, "loss": 137.9425, "step": 21580 }, { "epoch": 0.17859949538817885, "grad_norm": 1399.0616455078125, "learning_rate": 9.832039019898922e-06, "loss": 141.6175, "step": 21590 }, { "epoch": 0.17868221863754807, "grad_norm": 1321.5462646484375, "learning_rate": 9.831676344247343e-06, "loss": 145.0367, "step": 21600 }, { "epoch": 0.17876494188691733, "grad_norm": 2181.245361328125, "learning_rate": 9.831313284161452e-06, "loss": 127.1829, "step": 21610 }, { "epoch": 0.17884766513628655, "grad_norm": 980.81201171875, "learning_rate": 9.830949839670134e-06, "loss": 160.9928, "step": 21620 }, { "epoch": 0.17893038838565578, "grad_norm": 1042.4224853515625, "learning_rate": 9.83058601080231e-06, "loss": 143.5391, "step": 21630 }, { "epoch": 0.17901311163502504, "grad_norm": 1248.8353271484375, "learning_rate": 9.830221797586925e-06, "loss": 125.7723, "step": 21640 }, { "epoch": 0.17909583488439426, "grad_norm": 765.4020385742188, "learning_rate": 9.829857200052961e-06, "loss": 145.4247, "step": 21650 }, { "epoch": 0.1791785581337635, "grad_norm": 1584.0721435546875, "learning_rate": 9.829492218229426e-06, "loss": 129.5774, "step": 21660 }, { "epoch": 0.17926128138313274, "grad_norm": 640.2371215820312, "learning_rate": 9.829126852145357e-06, "loss": 175.8682, "step": 21670 }, { "epoch": 0.17934400463250197, "grad_norm": 879.1619262695312, "learning_rate": 9.82876110182983e-06, "loss": 148.2943, "step": 21680 }, { "epoch": 0.1794267278818712, "grad_norm": 1808.1944580078125, "learning_rate": 9.82839496731194e-06, "loss": 161.7926, "step": 21690 }, { "epoch": 0.17950945113124042, "grad_norm": 1331.4281005859375, "learning_rate": 9.828028448620824e-06, "loss": 144.7912, "step": 21700 }, { "epoch": 0.17959217438060968, "grad_norm": 916.88134765625, "learning_rate": 9.827661545785641e-06, "loss": 111.8015, "step": 21710 }, { "epoch": 0.1796748976299789, "grad_norm": 945.744140625, "learning_rate": 9.827294258835584e-06, "loss": 135.7884, "step": 21720 }, { "epoch": 0.17975762087934813, "grad_norm": 843.4868774414062, "learning_rate": 9.82692658779988e-06, "loss": 166.0405, "step": 21730 }, { "epoch": 0.17984034412871738, "grad_norm": 866.8135375976562, "learning_rate": 9.826558532707777e-06, "loss": 131.8315, "step": 21740 }, { "epoch": 0.1799230673780866, "grad_norm": 1258.517822265625, "learning_rate": 9.826190093588564e-06, "loss": 192.2693, "step": 21750 }, { "epoch": 0.18000579062745584, "grad_norm": 755.1361694335938, "learning_rate": 9.825821270471555e-06, "loss": 110.0884, "step": 21760 }, { "epoch": 0.1800885138768251, "grad_norm": 505.46856689453125, "learning_rate": 9.825452063386094e-06, "loss": 117.8567, "step": 21770 }, { "epoch": 0.18017123712619432, "grad_norm": 1369.44287109375, "learning_rate": 9.825082472361558e-06, "loss": 135.8278, "step": 21780 }, { "epoch": 0.18025396037556354, "grad_norm": 917.852783203125, "learning_rate": 9.824712497427354e-06, "loss": 101.3896, "step": 21790 }, { "epoch": 0.1803366836249328, "grad_norm": 987.9921264648438, "learning_rate": 9.824342138612918e-06, "loss": 140.2833, "step": 21800 }, { "epoch": 0.18041940687430202, "grad_norm": 902.6442260742188, "learning_rate": 9.823971395947723e-06, "loss": 147.3234, "step": 21810 }, { "epoch": 0.18050213012367125, "grad_norm": 767.7576293945312, "learning_rate": 9.823600269461259e-06, "loss": 148.7868, "step": 21820 }, { "epoch": 0.1805848533730405, "grad_norm": 766.7611694335938, "learning_rate": 9.823228759183058e-06, "loss": 124.8973, "step": 21830 }, { "epoch": 0.18066757662240973, "grad_norm": 836.1171264648438, "learning_rate": 9.822856865142683e-06, "loss": 117.0898, "step": 21840 }, { "epoch": 0.18075029987177896, "grad_norm": 1208.75146484375, "learning_rate": 9.822484587369721e-06, "loss": 125.5648, "step": 21850 }, { "epoch": 0.1808330231211482, "grad_norm": 1158.348388671875, "learning_rate": 9.822111925893792e-06, "loss": 139.8755, "step": 21860 }, { "epoch": 0.18091574637051744, "grad_norm": 1267.1917724609375, "learning_rate": 9.821738880744549e-06, "loss": 135.5524, "step": 21870 }, { "epoch": 0.18099846961988666, "grad_norm": 2137.44580078125, "learning_rate": 9.82136545195167e-06, "loss": 159.7926, "step": 21880 }, { "epoch": 0.1810811928692559, "grad_norm": 955.8012084960938, "learning_rate": 9.82099163954487e-06, "loss": 121.1186, "step": 21890 }, { "epoch": 0.18116391611862515, "grad_norm": 1042.67724609375, "learning_rate": 9.820617443553889e-06, "loss": 132.7085, "step": 21900 }, { "epoch": 0.18124663936799437, "grad_norm": 988.7931518554688, "learning_rate": 9.820242864008503e-06, "loss": 141.3183, "step": 21910 }, { "epoch": 0.1813293626173636, "grad_norm": 1212.5155029296875, "learning_rate": 9.819867900938514e-06, "loss": 139.713, "step": 21920 }, { "epoch": 0.18141208586673285, "grad_norm": 714.6464233398438, "learning_rate": 9.819492554373758e-06, "loss": 122.3466, "step": 21930 }, { "epoch": 0.18149480911610208, "grad_norm": 1552.1221923828125, "learning_rate": 9.819116824344095e-06, "loss": 127.3137, "step": 21940 }, { "epoch": 0.1815775323654713, "grad_norm": 577.6259155273438, "learning_rate": 9.818740710879424e-06, "loss": 87.6241, "step": 21950 }, { "epoch": 0.18166025561484056, "grad_norm": 947.9591064453125, "learning_rate": 9.81836421400967e-06, "loss": 109.6651, "step": 21960 }, { "epoch": 0.1817429788642098, "grad_norm": 915.073486328125, "learning_rate": 9.81798733376479e-06, "loss": 112.4667, "step": 21970 }, { "epoch": 0.181825702113579, "grad_norm": 971.461181640625, "learning_rate": 9.817610070174768e-06, "loss": 159.3875, "step": 21980 }, { "epoch": 0.18190842536294827, "grad_norm": 839.3546142578125, "learning_rate": 9.817232423269622e-06, "loss": 141.2685, "step": 21990 }, { "epoch": 0.1819911486123175, "grad_norm": 901.919189453125, "learning_rate": 9.816854393079402e-06, "loss": 150.3302, "step": 22000 }, { "epoch": 0.18207387186168672, "grad_norm": 1099.916259765625, "learning_rate": 9.816475979634183e-06, "loss": 105.6784, "step": 22010 }, { "epoch": 0.18215659511105597, "grad_norm": 1539.3607177734375, "learning_rate": 9.816097182964076e-06, "loss": 150.3366, "step": 22020 }, { "epoch": 0.1822393183604252, "grad_norm": 1262.76513671875, "learning_rate": 9.81571800309922e-06, "loss": 205.2206, "step": 22030 }, { "epoch": 0.18232204160979443, "grad_norm": 1119.6072998046875, "learning_rate": 9.815338440069782e-06, "loss": 97.6272, "step": 22040 }, { "epoch": 0.18240476485916368, "grad_norm": 977.2727661132812, "learning_rate": 9.814958493905962e-06, "loss": 154.7452, "step": 22050 }, { "epoch": 0.1824874881085329, "grad_norm": 1467.2486572265625, "learning_rate": 9.814578164637996e-06, "loss": 116.3554, "step": 22060 }, { "epoch": 0.18257021135790213, "grad_norm": 1132.6796875, "learning_rate": 9.81419745229614e-06, "loss": 146.8583, "step": 22070 }, { "epoch": 0.18265293460727136, "grad_norm": 1301.7706298828125, "learning_rate": 9.813816356910685e-06, "loss": 150.6081, "step": 22080 }, { "epoch": 0.18273565785664062, "grad_norm": 786.4771728515625, "learning_rate": 9.813434878511956e-06, "loss": 98.5915, "step": 22090 }, { "epoch": 0.18281838110600984, "grad_norm": 1082.4107666015625, "learning_rate": 9.813053017130305e-06, "loss": 106.2249, "step": 22100 }, { "epoch": 0.18290110435537907, "grad_norm": 844.4359130859375, "learning_rate": 9.812670772796113e-06, "loss": 108.4317, "step": 22110 }, { "epoch": 0.18298382760474832, "grad_norm": 1356.7515869140625, "learning_rate": 9.812288145539796e-06, "loss": 156.1458, "step": 22120 }, { "epoch": 0.18306655085411755, "grad_norm": 571.9811401367188, "learning_rate": 9.811905135391796e-06, "loss": 128.8242, "step": 22130 }, { "epoch": 0.18314927410348678, "grad_norm": 996.9983520507812, "learning_rate": 9.81152174238259e-06, "loss": 121.8907, "step": 22140 }, { "epoch": 0.18323199735285603, "grad_norm": 1005.370361328125, "learning_rate": 9.81113796654268e-06, "loss": 130.7526, "step": 22150 }, { "epoch": 0.18331472060222526, "grad_norm": 522.5587158203125, "learning_rate": 9.810753807902603e-06, "loss": 167.8494, "step": 22160 }, { "epoch": 0.18339744385159448, "grad_norm": 1476.099853515625, "learning_rate": 9.81036926649292e-06, "loss": 125.2975, "step": 22170 }, { "epoch": 0.18348016710096374, "grad_norm": 864.9098510742188, "learning_rate": 9.809984342344234e-06, "loss": 114.3755, "step": 22180 }, { "epoch": 0.18356289035033296, "grad_norm": 1131.15966796875, "learning_rate": 9.80959903548717e-06, "loss": 130.3137, "step": 22190 }, { "epoch": 0.1836456135997022, "grad_norm": 2763.490234375, "learning_rate": 9.80921334595238e-06, "loss": 159.7871, "step": 22200 }, { "epoch": 0.18372833684907144, "grad_norm": 815.5933227539062, "learning_rate": 9.808827273770558e-06, "loss": 103.766, "step": 22210 }, { "epoch": 0.18381106009844067, "grad_norm": 1680.257568359375, "learning_rate": 9.80844081897242e-06, "loss": 162.1204, "step": 22220 }, { "epoch": 0.1838937833478099, "grad_norm": 934.7627563476562, "learning_rate": 9.808053981588712e-06, "loss": 144.453, "step": 22230 }, { "epoch": 0.18397650659717912, "grad_norm": 1144.36767578125, "learning_rate": 9.807666761650215e-06, "loss": 121.5437, "step": 22240 }, { "epoch": 0.18405922984654838, "grad_norm": 2383.975341796875, "learning_rate": 9.80727915918774e-06, "loss": 178.4958, "step": 22250 }, { "epoch": 0.1841419530959176, "grad_norm": 1073.094482421875, "learning_rate": 9.806891174232122e-06, "loss": 149.8745, "step": 22260 }, { "epoch": 0.18422467634528683, "grad_norm": 1233.2926025390625, "learning_rate": 9.806502806814236e-06, "loss": 128.9702, "step": 22270 }, { "epoch": 0.18430739959465609, "grad_norm": 837.9373168945312, "learning_rate": 9.806114056964977e-06, "loss": 139.9306, "step": 22280 }, { "epoch": 0.1843901228440253, "grad_norm": 628.1285400390625, "learning_rate": 9.805724924715283e-06, "loss": 123.0449, "step": 22290 }, { "epoch": 0.18447284609339454, "grad_norm": 561.8240356445312, "learning_rate": 9.80533541009611e-06, "loss": 105.3535, "step": 22300 }, { "epoch": 0.1845555693427638, "grad_norm": 1421.851806640625, "learning_rate": 9.804945513138454e-06, "loss": 219.4902, "step": 22310 }, { "epoch": 0.18463829259213302, "grad_norm": 825.07080078125, "learning_rate": 9.804555233873335e-06, "loss": 135.7106, "step": 22320 }, { "epoch": 0.18472101584150225, "grad_norm": 2163.25439453125, "learning_rate": 9.804164572331804e-06, "loss": 173.7582, "step": 22330 }, { "epoch": 0.1848037390908715, "grad_norm": 1122.6065673828125, "learning_rate": 9.80377352854495e-06, "loss": 91.7703, "step": 22340 }, { "epoch": 0.18488646234024073, "grad_norm": 909.8526000976562, "learning_rate": 9.80338210254388e-06, "loss": 124.849, "step": 22350 }, { "epoch": 0.18496918558960995, "grad_norm": 784.2378540039062, "learning_rate": 9.80299029435974e-06, "loss": 148.4874, "step": 22360 }, { "epoch": 0.1850519088389792, "grad_norm": 999.297119140625, "learning_rate": 9.802598104023706e-06, "loss": 159.1145, "step": 22370 }, { "epoch": 0.18513463208834843, "grad_norm": 753.5262451171875, "learning_rate": 9.80220553156698e-06, "loss": 113.2028, "step": 22380 }, { "epoch": 0.18521735533771766, "grad_norm": 1254.95947265625, "learning_rate": 9.801812577020802e-06, "loss": 139.2906, "step": 22390 }, { "epoch": 0.1853000785870869, "grad_norm": 1023.6962890625, "learning_rate": 9.801419240416432e-06, "loss": 144.157, "step": 22400 }, { "epoch": 0.18538280183645614, "grad_norm": 937.4168701171875, "learning_rate": 9.80102552178517e-06, "loss": 124.0214, "step": 22410 }, { "epoch": 0.18546552508582537, "grad_norm": 1038.947265625, "learning_rate": 9.800631421158341e-06, "loss": 134.8847, "step": 22420 }, { "epoch": 0.1855482483351946, "grad_norm": 839.5892333984375, "learning_rate": 9.800236938567302e-06, "loss": 114.9243, "step": 22430 }, { "epoch": 0.18563097158456385, "grad_norm": 1064.1595458984375, "learning_rate": 9.799842074043438e-06, "loss": 132.5262, "step": 22440 }, { "epoch": 0.18571369483393307, "grad_norm": 868.5515747070312, "learning_rate": 9.799446827618172e-06, "loss": 123.4177, "step": 22450 }, { "epoch": 0.1857964180833023, "grad_norm": 1418.9991455078125, "learning_rate": 9.799051199322944e-06, "loss": 134.293, "step": 22460 }, { "epoch": 0.18587914133267155, "grad_norm": 1264.387939453125, "learning_rate": 9.798655189189239e-06, "loss": 155.6345, "step": 22470 }, { "epoch": 0.18596186458204078, "grad_norm": 1341.15185546875, "learning_rate": 9.798258797248563e-06, "loss": 151.904, "step": 22480 }, { "epoch": 0.18604458783141, "grad_norm": 1191.142578125, "learning_rate": 9.797862023532457e-06, "loss": 143.8828, "step": 22490 }, { "epoch": 0.18612731108077926, "grad_norm": 1029.2672119140625, "learning_rate": 9.797464868072489e-06, "loss": 111.6996, "step": 22500 }, { "epoch": 0.1862100343301485, "grad_norm": 898.6533203125, "learning_rate": 9.797067330900256e-06, "loss": 123.9605, "step": 22510 }, { "epoch": 0.18629275757951771, "grad_norm": 1148.5517578125, "learning_rate": 9.796669412047392e-06, "loss": 107.9766, "step": 22520 }, { "epoch": 0.18637548082888697, "grad_norm": 1347.3740234375, "learning_rate": 9.796271111545559e-06, "loss": 132.5673, "step": 22530 }, { "epoch": 0.1864582040782562, "grad_norm": 732.031982421875, "learning_rate": 9.795872429426443e-06, "loss": 113.1994, "step": 22540 }, { "epoch": 0.18654092732762542, "grad_norm": 847.0791625976562, "learning_rate": 9.79547336572177e-06, "loss": 115.746, "step": 22550 }, { "epoch": 0.18662365057699468, "grad_norm": 897.2965698242188, "learning_rate": 9.795073920463289e-06, "loss": 123.8876, "step": 22560 }, { "epoch": 0.1867063738263639, "grad_norm": 1148.21875, "learning_rate": 9.794674093682781e-06, "loss": 133.7378, "step": 22570 }, { "epoch": 0.18678909707573313, "grad_norm": 1180.834716796875, "learning_rate": 9.79427388541206e-06, "loss": 100.9495, "step": 22580 }, { "epoch": 0.18687182032510238, "grad_norm": 681.8207397460938, "learning_rate": 9.79387329568297e-06, "loss": 108.7203, "step": 22590 }, { "epoch": 0.1869545435744716, "grad_norm": 859.6419067382812, "learning_rate": 9.793472324527383e-06, "loss": 179.532, "step": 22600 }, { "epoch": 0.18703726682384084, "grad_norm": 615.40380859375, "learning_rate": 9.793070971977203e-06, "loss": 103.2244, "step": 22610 }, { "epoch": 0.18711999007321006, "grad_norm": 896.5668334960938, "learning_rate": 9.79266923806436e-06, "loss": 135.8797, "step": 22620 }, { "epoch": 0.18720271332257932, "grad_norm": 685.52685546875, "learning_rate": 9.792267122820823e-06, "loss": 102.7591, "step": 22630 }, { "epoch": 0.18728543657194854, "grad_norm": 656.8187255859375, "learning_rate": 9.791864626278584e-06, "loss": 134.8737, "step": 22640 }, { "epoch": 0.18736815982131777, "grad_norm": 406.96820068359375, "learning_rate": 9.791461748469669e-06, "loss": 129.8552, "step": 22650 }, { "epoch": 0.18745088307068702, "grad_norm": 1129.7803955078125, "learning_rate": 9.791058489426134e-06, "loss": 133.4618, "step": 22660 }, { "epoch": 0.18753360632005625, "grad_norm": 1034.3385009765625, "learning_rate": 9.790654849180059e-06, "loss": 107.0448, "step": 22670 }, { "epoch": 0.18761632956942548, "grad_norm": 1405.0714111328125, "learning_rate": 9.790250827763566e-06, "loss": 121.3317, "step": 22680 }, { "epoch": 0.18769905281879473, "grad_norm": 1143.3116455078125, "learning_rate": 9.7898464252088e-06, "loss": 93.2796, "step": 22690 }, { "epoch": 0.18778177606816396, "grad_norm": 1161.796630859375, "learning_rate": 9.789441641547935e-06, "loss": 112.5664, "step": 22700 }, { "epoch": 0.18786449931753318, "grad_norm": 1665.2811279296875, "learning_rate": 9.789036476813178e-06, "loss": 162.8835, "step": 22710 }, { "epoch": 0.18794722256690244, "grad_norm": 885.5316772460938, "learning_rate": 9.788630931036769e-06, "loss": 127.6729, "step": 22720 }, { "epoch": 0.18802994581627167, "grad_norm": 1863.15673828125, "learning_rate": 9.788225004250974e-06, "loss": 136.1164, "step": 22730 }, { "epoch": 0.1881126690656409, "grad_norm": 770.5360717773438, "learning_rate": 9.78781869648809e-06, "loss": 145.4966, "step": 22740 }, { "epoch": 0.18819539231501015, "grad_norm": 1846.6016845703125, "learning_rate": 9.787412007780445e-06, "loss": 119.8534, "step": 22750 }, { "epoch": 0.18827811556437937, "grad_norm": 930.654052734375, "learning_rate": 9.787004938160398e-06, "loss": 112.2845, "step": 22760 }, { "epoch": 0.1883608388137486, "grad_norm": 3968.6298828125, "learning_rate": 9.786597487660336e-06, "loss": 166.1542, "step": 22770 }, { "epoch": 0.18844356206311783, "grad_norm": 687.5050659179688, "learning_rate": 9.78618965631268e-06, "loss": 118.5364, "step": 22780 }, { "epoch": 0.18852628531248708, "grad_norm": 1421.8592529296875, "learning_rate": 9.785781444149883e-06, "loss": 102.7248, "step": 22790 }, { "epoch": 0.1886090085618563, "grad_norm": 1723.80029296875, "learning_rate": 9.785372851204415e-06, "loss": 136.1481, "step": 22800 }, { "epoch": 0.18869173181122553, "grad_norm": 1364.1827392578125, "learning_rate": 9.784963877508794e-06, "loss": 135.9108, "step": 22810 }, { "epoch": 0.1887744550605948, "grad_norm": 2061.134765625, "learning_rate": 9.784554523095554e-06, "loss": 201.537, "step": 22820 }, { "epoch": 0.188857178309964, "grad_norm": 958.0075073242188, "learning_rate": 9.784144787997272e-06, "loss": 96.8345, "step": 22830 }, { "epoch": 0.18893990155933324, "grad_norm": 717.226318359375, "learning_rate": 9.783734672246545e-06, "loss": 130.6322, "step": 22840 }, { "epoch": 0.1890226248087025, "grad_norm": 716.5567016601562, "learning_rate": 9.783324175876004e-06, "loss": 105.1676, "step": 22850 }, { "epoch": 0.18910534805807172, "grad_norm": 1025.28173828125, "learning_rate": 9.782913298918311e-06, "loss": 151.713, "step": 22860 }, { "epoch": 0.18918807130744095, "grad_norm": 649.5341186523438, "learning_rate": 9.782502041406157e-06, "loss": 135.4802, "step": 22870 }, { "epoch": 0.1892707945568102, "grad_norm": 1117.214599609375, "learning_rate": 9.782090403372263e-06, "loss": 148.4325, "step": 22880 }, { "epoch": 0.18935351780617943, "grad_norm": 1014.591552734375, "learning_rate": 9.781678384849385e-06, "loss": 111.5626, "step": 22890 }, { "epoch": 0.18943624105554865, "grad_norm": 2505.06982421875, "learning_rate": 9.7812659858703e-06, "loss": 142.1111, "step": 22900 }, { "epoch": 0.1895189643049179, "grad_norm": 1313.8260498046875, "learning_rate": 9.780853206467826e-06, "loss": 133.5671, "step": 22910 }, { "epoch": 0.18960168755428713, "grad_norm": 984.4515380859375, "learning_rate": 9.780440046674803e-06, "loss": 167.4173, "step": 22920 }, { "epoch": 0.18968441080365636, "grad_norm": 893.4295043945312, "learning_rate": 9.780026506524106e-06, "loss": 159.9201, "step": 22930 }, { "epoch": 0.18976713405302562, "grad_norm": 818.2760620117188, "learning_rate": 9.779612586048635e-06, "loss": 132.297, "step": 22940 }, { "epoch": 0.18984985730239484, "grad_norm": 1028.792236328125, "learning_rate": 9.779198285281326e-06, "loss": 175.7389, "step": 22950 }, { "epoch": 0.18993258055176407, "grad_norm": 1137.0174560546875, "learning_rate": 9.778783604255145e-06, "loss": 120.6092, "step": 22960 }, { "epoch": 0.1900153038011333, "grad_norm": 2498.3369140625, "learning_rate": 9.778368543003083e-06, "loss": 144.8033, "step": 22970 }, { "epoch": 0.19009802705050255, "grad_norm": 1361.688232421875, "learning_rate": 9.777953101558164e-06, "loss": 133.7076, "step": 22980 }, { "epoch": 0.19018075029987178, "grad_norm": 1047.64453125, "learning_rate": 9.777537279953448e-06, "loss": 120.5423, "step": 22990 }, { "epoch": 0.190263473549241, "grad_norm": 982.462158203125, "learning_rate": 9.777121078222015e-06, "loss": 108.502, "step": 23000 }, { "epoch": 0.19034619679861026, "grad_norm": 1133.6990966796875, "learning_rate": 9.77670449639698e-06, "loss": 129.8804, "step": 23010 }, { "epoch": 0.19042892004797948, "grad_norm": 671.9098510742188, "learning_rate": 9.776287534511492e-06, "loss": 130.2723, "step": 23020 }, { "epoch": 0.1905116432973487, "grad_norm": 1220.64892578125, "learning_rate": 9.775870192598726e-06, "loss": 122.8446, "step": 23030 }, { "epoch": 0.19059436654671796, "grad_norm": 2241.37548828125, "learning_rate": 9.775452470691886e-06, "loss": 133.7913, "step": 23040 }, { "epoch": 0.1906770897960872, "grad_norm": 1148.03662109375, "learning_rate": 9.77503436882421e-06, "loss": 138.1318, "step": 23050 }, { "epoch": 0.19075981304545642, "grad_norm": 980.452880859375, "learning_rate": 9.774615887028964e-06, "loss": 147.464, "step": 23060 }, { "epoch": 0.19084253629482567, "grad_norm": 2212.347412109375, "learning_rate": 9.774197025339442e-06, "loss": 102.5226, "step": 23070 }, { "epoch": 0.1909252595441949, "grad_norm": 928.7798461914062, "learning_rate": 9.773777783788976e-06, "loss": 113.3667, "step": 23080 }, { "epoch": 0.19100798279356412, "grad_norm": 1282.0994873046875, "learning_rate": 9.77335816241092e-06, "loss": 144.8801, "step": 23090 }, { "epoch": 0.19109070604293338, "grad_norm": 837.4090576171875, "learning_rate": 9.77293816123866e-06, "loss": 104.0558, "step": 23100 }, { "epoch": 0.1911734292923026, "grad_norm": 881.4822998046875, "learning_rate": 9.772517780305618e-06, "loss": 121.4086, "step": 23110 }, { "epoch": 0.19125615254167183, "grad_norm": 880.7931518554688, "learning_rate": 9.772097019645236e-06, "loss": 139.5056, "step": 23120 }, { "epoch": 0.19133887579104109, "grad_norm": 776.1262817382812, "learning_rate": 9.771675879290998e-06, "loss": 146.6072, "step": 23130 }, { "epoch": 0.1914215990404103, "grad_norm": 904.765380859375, "learning_rate": 9.771254359276407e-06, "loss": 116.9324, "step": 23140 }, { "epoch": 0.19150432228977954, "grad_norm": 730.8990478515625, "learning_rate": 9.770832459635004e-06, "loss": 133.2764, "step": 23150 }, { "epoch": 0.19158704553914876, "grad_norm": 690.1975708007812, "learning_rate": 9.77041018040036e-06, "loss": 175.1648, "step": 23160 }, { "epoch": 0.19166976878851802, "grad_norm": 907.7679443359375, "learning_rate": 9.769987521606068e-06, "loss": 105.3016, "step": 23170 }, { "epoch": 0.19175249203788725, "grad_norm": 693.336181640625, "learning_rate": 9.769564483285761e-06, "loss": 192.6149, "step": 23180 }, { "epoch": 0.19183521528725647, "grad_norm": 557.3772583007812, "learning_rate": 9.769141065473099e-06, "loss": 115.1971, "step": 23190 }, { "epoch": 0.19191793853662573, "grad_norm": 858.6890869140625, "learning_rate": 9.768717268201768e-06, "loss": 145.7314, "step": 23200 }, { "epoch": 0.19200066178599495, "grad_norm": 2752.160888671875, "learning_rate": 9.768293091505491e-06, "loss": 128.7382, "step": 23210 }, { "epoch": 0.19208338503536418, "grad_norm": 971.1498413085938, "learning_rate": 9.767868535418014e-06, "loss": 126.5057, "step": 23220 }, { "epoch": 0.19216610828473343, "grad_norm": 793.6380615234375, "learning_rate": 9.767443599973122e-06, "loss": 108.5758, "step": 23230 }, { "epoch": 0.19224883153410266, "grad_norm": 1140.6827392578125, "learning_rate": 9.76701828520462e-06, "loss": 117.5244, "step": 23240 }, { "epoch": 0.1923315547834719, "grad_norm": 840.5177001953125, "learning_rate": 9.766592591146353e-06, "loss": 107.5938, "step": 23250 }, { "epoch": 0.19241427803284114, "grad_norm": 1344.9217529296875, "learning_rate": 9.766166517832188e-06, "loss": 120.1126, "step": 23260 }, { "epoch": 0.19249700128221037, "grad_norm": 1015.5147705078125, "learning_rate": 9.765740065296025e-06, "loss": 131.5013, "step": 23270 }, { "epoch": 0.1925797245315796, "grad_norm": 574.64306640625, "learning_rate": 9.765313233571798e-06, "loss": 131.5927, "step": 23280 }, { "epoch": 0.19266244778094885, "grad_norm": 1062.6119384765625, "learning_rate": 9.76488602269347e-06, "loss": 162.6192, "step": 23290 }, { "epoch": 0.19274517103031807, "grad_norm": 1031.8822021484375, "learning_rate": 9.764458432695026e-06, "loss": 91.6339, "step": 23300 }, { "epoch": 0.1928278942796873, "grad_norm": 715.5499877929688, "learning_rate": 9.76403046361049e-06, "loss": 98.5823, "step": 23310 }, { "epoch": 0.19291061752905656, "grad_norm": 707.9616088867188, "learning_rate": 9.763602115473914e-06, "loss": 129.4643, "step": 23320 }, { "epoch": 0.19299334077842578, "grad_norm": 1023.2615966796875, "learning_rate": 9.763173388319381e-06, "loss": 108.6447, "step": 23330 }, { "epoch": 0.193076064027795, "grad_norm": 745.7240600585938, "learning_rate": 9.762744282181e-06, "loss": 160.1926, "step": 23340 }, { "epoch": 0.19315878727716423, "grad_norm": 1169.117431640625, "learning_rate": 9.762314797092916e-06, "loss": 136.8101, "step": 23350 }, { "epoch": 0.1932415105265335, "grad_norm": 2356.3876953125, "learning_rate": 9.761884933089301e-06, "loss": 149.6885, "step": 23360 }, { "epoch": 0.19332423377590272, "grad_norm": 748.1530151367188, "learning_rate": 9.761454690204352e-06, "loss": 90.2527, "step": 23370 }, { "epoch": 0.19340695702527194, "grad_norm": 773.795654296875, "learning_rate": 9.76102406847231e-06, "loss": 133.7801, "step": 23380 }, { "epoch": 0.1934896802746412, "grad_norm": 785.1251831054688, "learning_rate": 9.760593067927428e-06, "loss": 92.1381, "step": 23390 }, { "epoch": 0.19357240352401042, "grad_norm": 987.9070434570312, "learning_rate": 9.760161688604008e-06, "loss": 152.8994, "step": 23400 }, { "epoch": 0.19365512677337965, "grad_norm": 1091.9166259765625, "learning_rate": 9.759729930536367e-06, "loss": 106.4156, "step": 23410 }, { "epoch": 0.1937378500227489, "grad_norm": 828.226806640625, "learning_rate": 9.75929779375886e-06, "loss": 127.5234, "step": 23420 }, { "epoch": 0.19382057327211813, "grad_norm": 1164.93359375, "learning_rate": 9.75886527830587e-06, "loss": 147.2189, "step": 23430 }, { "epoch": 0.19390329652148736, "grad_norm": 1556.861572265625, "learning_rate": 9.75843238421181e-06, "loss": 132.9836, "step": 23440 }, { "epoch": 0.1939860197708566, "grad_norm": 1154.7764892578125, "learning_rate": 9.757999111511121e-06, "loss": 131.7635, "step": 23450 }, { "epoch": 0.19406874302022584, "grad_norm": 1170.080078125, "learning_rate": 9.757565460238281e-06, "loss": 120.619, "step": 23460 }, { "epoch": 0.19415146626959506, "grad_norm": 1463.3924560546875, "learning_rate": 9.757131430427791e-06, "loss": 129.6818, "step": 23470 }, { "epoch": 0.19423418951896432, "grad_norm": 1100.9061279296875, "learning_rate": 9.756697022114185e-06, "loss": 108.3849, "step": 23480 }, { "epoch": 0.19431691276833354, "grad_norm": 1876.0504150390625, "learning_rate": 9.756262235332029e-06, "loss": 103.7323, "step": 23490 }, { "epoch": 0.19439963601770277, "grad_norm": 974.3872680664062, "learning_rate": 9.755827070115915e-06, "loss": 105.9175, "step": 23500 }, { "epoch": 0.194482359267072, "grad_norm": 888.02099609375, "learning_rate": 9.755391526500466e-06, "loss": 108.9482, "step": 23510 }, { "epoch": 0.19456508251644125, "grad_norm": 863.0517578125, "learning_rate": 9.75495560452034e-06, "loss": 131.4029, "step": 23520 }, { "epoch": 0.19464780576581048, "grad_norm": 1131.92431640625, "learning_rate": 9.754519304210214e-06, "loss": 122.4951, "step": 23530 }, { "epoch": 0.1947305290151797, "grad_norm": 2271.8134765625, "learning_rate": 9.754082625604812e-06, "loss": 121.6546, "step": 23540 }, { "epoch": 0.19481325226454896, "grad_norm": 919.2047119140625, "learning_rate": 9.753645568738872e-06, "loss": 138.8903, "step": 23550 }, { "epoch": 0.19489597551391818, "grad_norm": 840.5191650390625, "learning_rate": 9.75320813364717e-06, "loss": 91.0436, "step": 23560 }, { "epoch": 0.1949786987632874, "grad_norm": 621.6953125, "learning_rate": 9.752770320364512e-06, "loss": 128.6445, "step": 23570 }, { "epoch": 0.19506142201265667, "grad_norm": 754.2584838867188, "learning_rate": 9.752332128925732e-06, "loss": 111.4495, "step": 23580 }, { "epoch": 0.1951441452620259, "grad_norm": 1024.14501953125, "learning_rate": 9.751893559365693e-06, "loss": 170.7815, "step": 23590 }, { "epoch": 0.19522686851139512, "grad_norm": 1205.4876708984375, "learning_rate": 9.751454611719294e-06, "loss": 121.6351, "step": 23600 }, { "epoch": 0.19530959176076437, "grad_norm": 1503.17236328125, "learning_rate": 9.751015286021455e-06, "loss": 164.3172, "step": 23610 }, { "epoch": 0.1953923150101336, "grad_norm": 937.5809936523438, "learning_rate": 9.750575582307136e-06, "loss": 160.1191, "step": 23620 }, { "epoch": 0.19547503825950283, "grad_norm": 983.7006225585938, "learning_rate": 9.75013550061132e-06, "loss": 120.9869, "step": 23630 }, { "epoch": 0.19555776150887208, "grad_norm": 1772.2957763671875, "learning_rate": 9.749695040969022e-06, "loss": 114.3774, "step": 23640 }, { "epoch": 0.1956404847582413, "grad_norm": 924.7174072265625, "learning_rate": 9.749254203415288e-06, "loss": 124.1198, "step": 23650 }, { "epoch": 0.19572320800761053, "grad_norm": 972.820068359375, "learning_rate": 9.748812987985193e-06, "loss": 123.3391, "step": 23660 }, { "epoch": 0.1958059312569798, "grad_norm": 1330.291748046875, "learning_rate": 9.748371394713842e-06, "loss": 126.0948, "step": 23670 }, { "epoch": 0.195888654506349, "grad_norm": 1056.2666015625, "learning_rate": 9.747929423636372e-06, "loss": 128.4867, "step": 23680 }, { "epoch": 0.19597137775571824, "grad_norm": 670.9091186523438, "learning_rate": 9.74748707478795e-06, "loss": 140.6503, "step": 23690 }, { "epoch": 0.19605410100508747, "grad_norm": 1226.66162109375, "learning_rate": 9.747044348203766e-06, "loss": 130.9208, "step": 23700 }, { "epoch": 0.19613682425445672, "grad_norm": 907.8272705078125, "learning_rate": 9.74660124391905e-06, "loss": 127.5544, "step": 23710 }, { "epoch": 0.19621954750382595, "grad_norm": 898.1268920898438, "learning_rate": 9.746157761969058e-06, "loss": 164.2529, "step": 23720 }, { "epoch": 0.19630227075319517, "grad_norm": 1068.112060546875, "learning_rate": 9.745713902389074e-06, "loss": 143.5591, "step": 23730 }, { "epoch": 0.19638499400256443, "grad_norm": 1102.2633056640625, "learning_rate": 9.745269665214415e-06, "loss": 122.1263, "step": 23740 }, { "epoch": 0.19646771725193365, "grad_norm": 556.4323120117188, "learning_rate": 9.744825050480425e-06, "loss": 121.492, "step": 23750 }, { "epoch": 0.19655044050130288, "grad_norm": 1045.6256103515625, "learning_rate": 9.744380058222483e-06, "loss": 131.9083, "step": 23760 }, { "epoch": 0.19663316375067214, "grad_norm": 1136.8719482421875, "learning_rate": 9.743934688475994e-06, "loss": 115.8493, "step": 23770 }, { "epoch": 0.19671588700004136, "grad_norm": 962.9896850585938, "learning_rate": 9.743488941276394e-06, "loss": 117.902, "step": 23780 }, { "epoch": 0.1967986102494106, "grad_norm": 1086.8525390625, "learning_rate": 9.743042816659147e-06, "loss": 134.2704, "step": 23790 }, { "epoch": 0.19688133349877984, "grad_norm": 1227.1202392578125, "learning_rate": 9.742596314659751e-06, "loss": 127.5619, "step": 23800 }, { "epoch": 0.19696405674814907, "grad_norm": 1608.360595703125, "learning_rate": 9.742149435313732e-06, "loss": 120.5991, "step": 23810 }, { "epoch": 0.1970467799975183, "grad_norm": 664.271728515625, "learning_rate": 9.741702178656647e-06, "loss": 151.2022, "step": 23820 }, { "epoch": 0.19712950324688755, "grad_norm": 1345.0858154296875, "learning_rate": 9.74125454472408e-06, "loss": 136.8234, "step": 23830 }, { "epoch": 0.19721222649625678, "grad_norm": 732.8429565429688, "learning_rate": 9.740806533551647e-06, "loss": 234.7962, "step": 23840 }, { "epoch": 0.197294949745626, "grad_norm": 1542.8756103515625, "learning_rate": 9.740358145174999e-06, "loss": 130.3346, "step": 23850 }, { "epoch": 0.19737767299499526, "grad_norm": 821.8853149414062, "learning_rate": 9.739909379629805e-06, "loss": 139.0029, "step": 23860 }, { "epoch": 0.19746039624436448, "grad_norm": 812.029541015625, "learning_rate": 9.739460236951778e-06, "loss": 127.8701, "step": 23870 }, { "epoch": 0.1975431194937337, "grad_norm": 969.9833984375, "learning_rate": 9.739010717176649e-06, "loss": 99.5856, "step": 23880 }, { "epoch": 0.19762584274310294, "grad_norm": 1117.0540771484375, "learning_rate": 9.738560820340189e-06, "loss": 114.7225, "step": 23890 }, { "epoch": 0.1977085659924722, "grad_norm": 601.694091796875, "learning_rate": 9.738110546478188e-06, "loss": 123.2165, "step": 23900 }, { "epoch": 0.19779128924184142, "grad_norm": 1268.921630859375, "learning_rate": 9.737659895626478e-06, "loss": 130.8849, "step": 23910 }, { "epoch": 0.19787401249121064, "grad_norm": 1429.4453125, "learning_rate": 9.737208867820914e-06, "loss": 123.7755, "step": 23920 }, { "epoch": 0.1979567357405799, "grad_norm": 654.82080078125, "learning_rate": 9.736757463097378e-06, "loss": 152.5086, "step": 23930 }, { "epoch": 0.19803945898994912, "grad_norm": 852.7525024414062, "learning_rate": 9.736305681491792e-06, "loss": 116.6475, "step": 23940 }, { "epoch": 0.19812218223931835, "grad_norm": 1039.8662109375, "learning_rate": 9.735853523040098e-06, "loss": 138.9281, "step": 23950 }, { "epoch": 0.1982049054886876, "grad_norm": 1178.4395751953125, "learning_rate": 9.735400987778274e-06, "loss": 143.172, "step": 23960 }, { "epoch": 0.19828762873805683, "grad_norm": 1232.4547119140625, "learning_rate": 9.734948075742328e-06, "loss": 121.5333, "step": 23970 }, { "epoch": 0.19837035198742606, "grad_norm": 865.7545166015625, "learning_rate": 9.734494786968293e-06, "loss": 154.3453, "step": 23980 }, { "epoch": 0.1984530752367953, "grad_norm": 669.6997680664062, "learning_rate": 9.734041121492235e-06, "loss": 144.5734, "step": 23990 }, { "epoch": 0.19853579848616454, "grad_norm": 1144.2825927734375, "learning_rate": 9.733587079350254e-06, "loss": 107.7752, "step": 24000 }, { "epoch": 0.19861852173553377, "grad_norm": 2160.167236328125, "learning_rate": 9.73313266057847e-06, "loss": 129.0158, "step": 24010 }, { "epoch": 0.19870124498490302, "grad_norm": 1064.593994140625, "learning_rate": 9.732677865213044e-06, "loss": 141.8949, "step": 24020 }, { "epoch": 0.19878396823427225, "grad_norm": 1279.8077392578125, "learning_rate": 9.73222269329016e-06, "loss": 153.0952, "step": 24030 }, { "epoch": 0.19886669148364147, "grad_norm": 950.4849853515625, "learning_rate": 9.731767144846034e-06, "loss": 146.848, "step": 24040 }, { "epoch": 0.1989494147330107, "grad_norm": 1283.0186767578125, "learning_rate": 9.731311219916912e-06, "loss": 150.6841, "step": 24050 }, { "epoch": 0.19903213798237995, "grad_norm": 704.3133544921875, "learning_rate": 9.730854918539072e-06, "loss": 149.4796, "step": 24060 }, { "epoch": 0.19911486123174918, "grad_norm": 846.5486450195312, "learning_rate": 9.730398240748816e-06, "loss": 138.2696, "step": 24070 }, { "epoch": 0.1991975844811184, "grad_norm": 779.280029296875, "learning_rate": 9.729941186582482e-06, "loss": 114.9246, "step": 24080 }, { "epoch": 0.19928030773048766, "grad_norm": 1280.6629638671875, "learning_rate": 9.729483756076436e-06, "loss": 96.8995, "step": 24090 }, { "epoch": 0.1993630309798569, "grad_norm": 626.4833374023438, "learning_rate": 9.729025949267072e-06, "loss": 117.7161, "step": 24100 }, { "epoch": 0.1994457542292261, "grad_norm": 866.7782592773438, "learning_rate": 9.728567766190817e-06, "loss": 110.1413, "step": 24110 }, { "epoch": 0.19952847747859537, "grad_norm": 1430.4652099609375, "learning_rate": 9.728109206884125e-06, "loss": 122.4254, "step": 24120 }, { "epoch": 0.1996112007279646, "grad_norm": 1178.0247802734375, "learning_rate": 9.727650271383485e-06, "loss": 131.602, "step": 24130 }, { "epoch": 0.19969392397733382, "grad_norm": 944.5925903320312, "learning_rate": 9.727190959725407e-06, "loss": 91.2385, "step": 24140 }, { "epoch": 0.19977664722670307, "grad_norm": 1111.4093017578125, "learning_rate": 9.72673127194644e-06, "loss": 185.1948, "step": 24150 }, { "epoch": 0.1998593704760723, "grad_norm": 1125.5455322265625, "learning_rate": 9.72627120808316e-06, "loss": 157.4787, "step": 24160 }, { "epoch": 0.19994209372544153, "grad_norm": 1090.2935791015625, "learning_rate": 9.725810768172169e-06, "loss": 106.9974, "step": 24170 }, { "epoch": 0.20002481697481078, "grad_norm": 620.5934448242188, "learning_rate": 9.725349952250105e-06, "loss": 115.3673, "step": 24180 }, { "epoch": 0.20010754022418, "grad_norm": 1263.1217041015625, "learning_rate": 9.724888760353631e-06, "loss": 148.2358, "step": 24190 }, { "epoch": 0.20019026347354923, "grad_norm": 1106.949951171875, "learning_rate": 9.72442719251944e-06, "loss": 170.5474, "step": 24200 }, { "epoch": 0.2002729867229185, "grad_norm": 1334.36962890625, "learning_rate": 9.723965248784264e-06, "loss": 149.0543, "step": 24210 }, { "epoch": 0.20035570997228772, "grad_norm": 1211.2431640625, "learning_rate": 9.723502929184851e-06, "loss": 126.5367, "step": 24220 }, { "epoch": 0.20043843322165694, "grad_norm": 863.3626708984375, "learning_rate": 9.723040233757987e-06, "loss": 142.1387, "step": 24230 }, { "epoch": 0.20052115647102617, "grad_norm": 1154.0657958984375, "learning_rate": 9.722577162540489e-06, "loss": 123.5952, "step": 24240 }, { "epoch": 0.20060387972039542, "grad_norm": 1012.69140625, "learning_rate": 9.7221137155692e-06, "loss": 110.8877, "step": 24250 }, { "epoch": 0.20068660296976465, "grad_norm": 1386.55859375, "learning_rate": 9.721649892880995e-06, "loss": 170.2592, "step": 24260 }, { "epoch": 0.20076932621913388, "grad_norm": 1342.9095458984375, "learning_rate": 9.721185694512776e-06, "loss": 134.4591, "step": 24270 }, { "epoch": 0.20085204946850313, "grad_norm": 392.2961120605469, "learning_rate": 9.720721120501478e-06, "loss": 134.1603, "step": 24280 }, { "epoch": 0.20093477271787236, "grad_norm": 826.9261474609375, "learning_rate": 9.720256170884066e-06, "loss": 101.2419, "step": 24290 }, { "epoch": 0.20101749596724158, "grad_norm": 791.0903930664062, "learning_rate": 9.719790845697534e-06, "loss": 129.4301, "step": 24300 }, { "epoch": 0.20110021921661084, "grad_norm": 1188.9549560546875, "learning_rate": 9.719325144978907e-06, "loss": 145.1762, "step": 24310 }, { "epoch": 0.20118294246598006, "grad_norm": 1141.0123291015625, "learning_rate": 9.718859068765234e-06, "loss": 107.9242, "step": 24320 }, { "epoch": 0.2012656657153493, "grad_norm": 1065.128173828125, "learning_rate": 9.718392617093602e-06, "loss": 113.2393, "step": 24330 }, { "epoch": 0.20134838896471854, "grad_norm": 1253.3646240234375, "learning_rate": 9.717925790001125e-06, "loss": 77.6727, "step": 24340 }, { "epoch": 0.20143111221408777, "grad_norm": 1093.85498046875, "learning_rate": 9.717458587524946e-06, "loss": 112.4173, "step": 24350 }, { "epoch": 0.201513835463457, "grad_norm": 787.4635009765625, "learning_rate": 9.716991009702236e-06, "loss": 114.5591, "step": 24360 }, { "epoch": 0.20159655871282625, "grad_norm": 926.3908081054688, "learning_rate": 9.7165230565702e-06, "loss": 123.4194, "step": 24370 }, { "epoch": 0.20167928196219548, "grad_norm": 792.05859375, "learning_rate": 9.71605472816607e-06, "loss": 128.6131, "step": 24380 }, { "epoch": 0.2017620052115647, "grad_norm": 835.9586791992188, "learning_rate": 9.71558602452711e-06, "loss": 125.1441, "step": 24390 }, { "epoch": 0.20184472846093396, "grad_norm": 1247.4454345703125, "learning_rate": 9.71511694569061e-06, "loss": 166.359, "step": 24400 }, { "epoch": 0.20192745171030319, "grad_norm": 828.7860107421875, "learning_rate": 9.714647491693897e-06, "loss": 140.4719, "step": 24410 }, { "epoch": 0.2020101749596724, "grad_norm": 843.8282470703125, "learning_rate": 9.714177662574316e-06, "loss": 101.7997, "step": 24420 }, { "epoch": 0.20209289820904164, "grad_norm": 967.5045776367188, "learning_rate": 9.713707458369258e-06, "loss": 124.0601, "step": 24430 }, { "epoch": 0.2021756214584109, "grad_norm": 692.8041381835938, "learning_rate": 9.713236879116127e-06, "loss": 120.8918, "step": 24440 }, { "epoch": 0.20225834470778012, "grad_norm": 1022.600341796875, "learning_rate": 9.71276592485237e-06, "loss": 128.2274, "step": 24450 }, { "epoch": 0.20234106795714935, "grad_norm": 1309.6890869140625, "learning_rate": 9.712294595615458e-06, "loss": 134.2852, "step": 24460 }, { "epoch": 0.2024237912065186, "grad_norm": 596.5182495117188, "learning_rate": 9.711822891442887e-06, "loss": 141.3561, "step": 24470 }, { "epoch": 0.20250651445588783, "grad_norm": 1456.3428955078125, "learning_rate": 9.711350812372198e-06, "loss": 166.3525, "step": 24480 }, { "epoch": 0.20258923770525705, "grad_norm": 1364.7294921875, "learning_rate": 9.710878358440945e-06, "loss": 178.6096, "step": 24490 }, { "epoch": 0.2026719609546263, "grad_norm": 941.8969116210938, "learning_rate": 9.710405529686722e-06, "loss": 154.1988, "step": 24500 }, { "epoch": 0.20275468420399553, "grad_norm": 1231.3597412109375, "learning_rate": 9.709932326147147e-06, "loss": 109.8311, "step": 24510 }, { "epoch": 0.20283740745336476, "grad_norm": 981.5293579101562, "learning_rate": 9.709458747859874e-06, "loss": 106.3588, "step": 24520 }, { "epoch": 0.20292013070273401, "grad_norm": 882.2429809570312, "learning_rate": 9.708984794862581e-06, "loss": 106.4644, "step": 24530 }, { "epoch": 0.20300285395210324, "grad_norm": 1536.478271484375, "learning_rate": 9.708510467192981e-06, "loss": 115.3478, "step": 24540 }, { "epoch": 0.20308557720147247, "grad_norm": 480.1463928222656, "learning_rate": 9.70803576488881e-06, "loss": 131.8311, "step": 24550 }, { "epoch": 0.20316830045084172, "grad_norm": 751.3448486328125, "learning_rate": 9.707560687987843e-06, "loss": 100.0164, "step": 24560 }, { "epoch": 0.20325102370021095, "grad_norm": 983.8947143554688, "learning_rate": 9.707085236527873e-06, "loss": 124.7845, "step": 24570 }, { "epoch": 0.20333374694958017, "grad_norm": 778.719970703125, "learning_rate": 9.706609410546736e-06, "loss": 70.2131, "step": 24580 }, { "epoch": 0.20341647019894943, "grad_norm": 799.94580078125, "learning_rate": 9.706133210082288e-06, "loss": 107.5597, "step": 24590 }, { "epoch": 0.20349919344831865, "grad_norm": 775.4839477539062, "learning_rate": 9.705656635172418e-06, "loss": 123.562, "step": 24600 }, { "epoch": 0.20358191669768788, "grad_norm": 906.2435302734375, "learning_rate": 9.705179685855048e-06, "loss": 141.3586, "step": 24610 }, { "epoch": 0.2036646399470571, "grad_norm": 572.1375732421875, "learning_rate": 9.704702362168121e-06, "loss": 174.9464, "step": 24620 }, { "epoch": 0.20374736319642636, "grad_norm": 1068.9876708984375, "learning_rate": 9.704224664149621e-06, "loss": 122.0626, "step": 24630 }, { "epoch": 0.2038300864457956, "grad_norm": 606.3220825195312, "learning_rate": 9.703746591837552e-06, "loss": 109.6907, "step": 24640 }, { "epoch": 0.20391280969516482, "grad_norm": 1032.2161865234375, "learning_rate": 9.703268145269957e-06, "loss": 126.0382, "step": 24650 }, { "epoch": 0.20399553294453407, "grad_norm": 1022.0555419921875, "learning_rate": 9.702789324484898e-06, "loss": 188.598, "step": 24660 }, { "epoch": 0.2040782561939033, "grad_norm": 1047.5244140625, "learning_rate": 9.702310129520476e-06, "loss": 122.4435, "step": 24670 }, { "epoch": 0.20416097944327252, "grad_norm": 1051.2161865234375, "learning_rate": 9.701830560414817e-06, "loss": 144.9207, "step": 24680 }, { "epoch": 0.20424370269264178, "grad_norm": 1291.77099609375, "learning_rate": 9.701350617206081e-06, "loss": 141.4524, "step": 24690 }, { "epoch": 0.204326425942011, "grad_norm": 1048.1988525390625, "learning_rate": 9.700870299932453e-06, "loss": 133.1601, "step": 24700 }, { "epoch": 0.20440914919138023, "grad_norm": 1616.698486328125, "learning_rate": 9.700389608632146e-06, "loss": 159.5847, "step": 24710 }, { "epoch": 0.20449187244074948, "grad_norm": 863.6107788085938, "learning_rate": 9.699908543343413e-06, "loss": 135.6566, "step": 24720 }, { "epoch": 0.2045745956901187, "grad_norm": 1772.513916015625, "learning_rate": 9.699427104104525e-06, "loss": 202.6929, "step": 24730 }, { "epoch": 0.20465731893948794, "grad_norm": 822.7219848632812, "learning_rate": 9.698945290953789e-06, "loss": 140.5493, "step": 24740 }, { "epoch": 0.2047400421888572, "grad_norm": 811.2693481445312, "learning_rate": 9.698463103929542e-06, "loss": 81.8386, "step": 24750 }, { "epoch": 0.20482276543822642, "grad_norm": 530.8605346679688, "learning_rate": 9.69798054307015e-06, "loss": 106.7644, "step": 24760 }, { "epoch": 0.20490548868759564, "grad_norm": 2197.434326171875, "learning_rate": 9.697497608414007e-06, "loss": 189.2195, "step": 24770 }, { "epoch": 0.20498821193696487, "grad_norm": 1628.6551513671875, "learning_rate": 9.697014299999536e-06, "loss": 110.0945, "step": 24780 }, { "epoch": 0.20507093518633412, "grad_norm": 988.3732299804688, "learning_rate": 9.696530617865197e-06, "loss": 147.2918, "step": 24790 }, { "epoch": 0.20515365843570335, "grad_norm": 934.0083618164062, "learning_rate": 9.696046562049469e-06, "loss": 124.3846, "step": 24800 }, { "epoch": 0.20523638168507258, "grad_norm": 1064.4521484375, "learning_rate": 9.695562132590865e-06, "loss": 130.5005, "step": 24810 }, { "epoch": 0.20531910493444183, "grad_norm": 936.8753662109375, "learning_rate": 9.695077329527936e-06, "loss": 79.4409, "step": 24820 }, { "epoch": 0.20540182818381106, "grad_norm": 2023.431396484375, "learning_rate": 9.694592152899249e-06, "loss": 135.3671, "step": 24830 }, { "epoch": 0.20548455143318028, "grad_norm": 1526.9307861328125, "learning_rate": 9.694106602743411e-06, "loss": 141.677, "step": 24840 }, { "epoch": 0.20556727468254954, "grad_norm": 1296.6025390625, "learning_rate": 9.693620679099055e-06, "loss": 106.9513, "step": 24850 }, { "epoch": 0.20564999793191877, "grad_norm": 675.1536865234375, "learning_rate": 9.693134382004839e-06, "loss": 122.0934, "step": 24860 }, { "epoch": 0.205732721181288, "grad_norm": 1233.307373046875, "learning_rate": 9.69264771149946e-06, "loss": 152.0633, "step": 24870 }, { "epoch": 0.20581544443065725, "grad_norm": 2084.801025390625, "learning_rate": 9.692160667621639e-06, "loss": 191.7889, "step": 24880 }, { "epoch": 0.20589816768002647, "grad_norm": 1063.7586669921875, "learning_rate": 9.69167325041013e-06, "loss": 98.7458, "step": 24890 }, { "epoch": 0.2059808909293957, "grad_norm": 1160.4176025390625, "learning_rate": 9.69118545990371e-06, "loss": 136.4698, "step": 24900 }, { "epoch": 0.20606361417876495, "grad_norm": 1240.2447509765625, "learning_rate": 9.690697296141194e-06, "loss": 131.5934, "step": 24910 }, { "epoch": 0.20614633742813418, "grad_norm": 1285.3697509765625, "learning_rate": 9.690208759161418e-06, "loss": 143.4328, "step": 24920 }, { "epoch": 0.2062290606775034, "grad_norm": 899.1880493164062, "learning_rate": 9.689719849003261e-06, "loss": 150.501, "step": 24930 }, { "epoch": 0.20631178392687266, "grad_norm": 1172.1856689453125, "learning_rate": 9.689230565705617e-06, "loss": 88.1189, "step": 24940 }, { "epoch": 0.2063945071762419, "grad_norm": 940.1471557617188, "learning_rate": 9.688740909307416e-06, "loss": 126.9263, "step": 24950 }, { "epoch": 0.2064772304256111, "grad_norm": 1031.7501220703125, "learning_rate": 9.68825087984762e-06, "loss": 145.9387, "step": 24960 }, { "epoch": 0.20655995367498034, "grad_norm": 747.2893676757812, "learning_rate": 9.687760477365217e-06, "loss": 111.3116, "step": 24970 }, { "epoch": 0.2066426769243496, "grad_norm": 1009.5612182617188, "learning_rate": 9.687269701899228e-06, "loss": 122.0988, "step": 24980 }, { "epoch": 0.20672540017371882, "grad_norm": 1089.2601318359375, "learning_rate": 9.6867785534887e-06, "loss": 118.2881, "step": 24990 }, { "epoch": 0.20680812342308805, "grad_norm": 1918.05908203125, "learning_rate": 9.686287032172712e-06, "loss": 119.0276, "step": 25000 }, { "epoch": 0.2068908466724573, "grad_norm": 1754.23486328125, "learning_rate": 9.685795137990372e-06, "loss": 149.8688, "step": 25010 }, { "epoch": 0.20697356992182653, "grad_norm": 701.3511352539062, "learning_rate": 9.685302870980819e-06, "loss": 111.5625, "step": 25020 }, { "epoch": 0.20705629317119575, "grad_norm": 1214.149658203125, "learning_rate": 9.684810231183218e-06, "loss": 104.316, "step": 25030 }, { "epoch": 0.207139016420565, "grad_norm": 1371.7374267578125, "learning_rate": 9.684317218636767e-06, "loss": 137.7989, "step": 25040 }, { "epoch": 0.20722173966993424, "grad_norm": 976.9849243164062, "learning_rate": 9.683823833380692e-06, "loss": 107.9419, "step": 25050 }, { "epoch": 0.20730446291930346, "grad_norm": 2080.160400390625, "learning_rate": 9.683330075454252e-06, "loss": 134.3795, "step": 25060 }, { "epoch": 0.20738718616867272, "grad_norm": 1066.323974609375, "learning_rate": 9.68283594489673e-06, "loss": 113.7562, "step": 25070 }, { "epoch": 0.20746990941804194, "grad_norm": 1623.5059814453125, "learning_rate": 9.682341441747446e-06, "loss": 124.4294, "step": 25080 }, { "epoch": 0.20755263266741117, "grad_norm": 1231.952880859375, "learning_rate": 9.68184656604574e-06, "loss": 168.1638, "step": 25090 }, { "epoch": 0.20763535591678042, "grad_norm": 604.9308471679688, "learning_rate": 9.681351317830991e-06, "loss": 150.3729, "step": 25100 }, { "epoch": 0.20771807916614965, "grad_norm": 915.5381469726562, "learning_rate": 9.680855697142601e-06, "loss": 89.7146, "step": 25110 }, { "epoch": 0.20780080241551888, "grad_norm": 517.771484375, "learning_rate": 9.680359704020005e-06, "loss": 110.3232, "step": 25120 }, { "epoch": 0.20788352566488813, "grad_norm": 999.2185668945312, "learning_rate": 9.67986333850267e-06, "loss": 155.6853, "step": 25130 }, { "epoch": 0.20796624891425736, "grad_norm": 837.3745727539062, "learning_rate": 9.679366600630085e-06, "loss": 123.4482, "step": 25140 }, { "epoch": 0.20804897216362658, "grad_norm": 1082.25146484375, "learning_rate": 9.678869490441775e-06, "loss": 92.413, "step": 25150 }, { "epoch": 0.2081316954129958, "grad_norm": 1187.2119140625, "learning_rate": 9.678372007977292e-06, "loss": 130.2228, "step": 25160 }, { "epoch": 0.20821441866236506, "grad_norm": 1067.64306640625, "learning_rate": 9.67787415327622e-06, "loss": 106.1636, "step": 25170 }, { "epoch": 0.2082971419117343, "grad_norm": 1119.31640625, "learning_rate": 9.67737592637817e-06, "loss": 125.416, "step": 25180 }, { "epoch": 0.20837986516110352, "grad_norm": 1132.6695556640625, "learning_rate": 9.676877327322785e-06, "loss": 121.1855, "step": 25190 }, { "epoch": 0.20846258841047277, "grad_norm": 921.9434204101562, "learning_rate": 9.676378356149733e-06, "loss": 119.0814, "step": 25200 }, { "epoch": 0.208545311659842, "grad_norm": 1111.28857421875, "learning_rate": 9.675879012898719e-06, "loss": 112.7059, "step": 25210 }, { "epoch": 0.20862803490921122, "grad_norm": 952.7010498046875, "learning_rate": 9.67537929760947e-06, "loss": 135.5189, "step": 25220 }, { "epoch": 0.20871075815858048, "grad_norm": 668.4132080078125, "learning_rate": 9.674879210321747e-06, "loss": 126.6339, "step": 25230 }, { "epoch": 0.2087934814079497, "grad_norm": 1562.3443603515625, "learning_rate": 9.67437875107534e-06, "loss": 157.3701, "step": 25240 }, { "epoch": 0.20887620465731893, "grad_norm": 787.549072265625, "learning_rate": 9.673877919910069e-06, "loss": 144.3811, "step": 25250 }, { "epoch": 0.20895892790668819, "grad_norm": 1168.210693359375, "learning_rate": 9.673376716865781e-06, "loss": 147.3342, "step": 25260 }, { "epoch": 0.2090416511560574, "grad_norm": 922.6990356445312, "learning_rate": 9.672875141982358e-06, "loss": 134.1699, "step": 25270 }, { "epoch": 0.20912437440542664, "grad_norm": 920.8440551757812, "learning_rate": 9.672373195299704e-06, "loss": 90.3388, "step": 25280 }, { "epoch": 0.2092070976547959, "grad_norm": 742.2246704101562, "learning_rate": 9.67187087685776e-06, "loss": 127.1734, "step": 25290 }, { "epoch": 0.20928982090416512, "grad_norm": 1413.235107421875, "learning_rate": 9.671368186696488e-06, "loss": 121.8572, "step": 25300 }, { "epoch": 0.20937254415353435, "grad_norm": 1089.089111328125, "learning_rate": 9.670865124855889e-06, "loss": 155.726, "step": 25310 }, { "epoch": 0.2094552674029036, "grad_norm": 1251.3277587890625, "learning_rate": 9.67036169137599e-06, "loss": 134.8554, "step": 25320 }, { "epoch": 0.20953799065227283, "grad_norm": 975.4923706054688, "learning_rate": 9.669857886296842e-06, "loss": 137.952, "step": 25330 }, { "epoch": 0.20962071390164205, "grad_norm": 1131.67822265625, "learning_rate": 9.669353709658537e-06, "loss": 119.2049, "step": 25340 }, { "epoch": 0.20970343715101128, "grad_norm": 637.6780395507812, "learning_rate": 9.668849161501186e-06, "loss": 104.5548, "step": 25350 }, { "epoch": 0.20978616040038053, "grad_norm": 1121.5142822265625, "learning_rate": 9.668344241864934e-06, "loss": 115.9487, "step": 25360 }, { "epoch": 0.20986888364974976, "grad_norm": 937.0314331054688, "learning_rate": 9.667838950789957e-06, "loss": 112.8814, "step": 25370 }, { "epoch": 0.209951606899119, "grad_norm": 1264.4849853515625, "learning_rate": 9.667333288316454e-06, "loss": 135.4153, "step": 25380 }, { "epoch": 0.21003433014848824, "grad_norm": 495.7436828613281, "learning_rate": 9.666827254484663e-06, "loss": 111.5311, "step": 25390 }, { "epoch": 0.21011705339785747, "grad_norm": 853.3590087890625, "learning_rate": 9.666320849334846e-06, "loss": 102.2723, "step": 25400 }, { "epoch": 0.2101997766472267, "grad_norm": 1231.4791259765625, "learning_rate": 9.665814072907293e-06, "loss": 118.1443, "step": 25410 }, { "epoch": 0.21028249989659595, "grad_norm": 688.8372192382812, "learning_rate": 9.665306925242329e-06, "loss": 120.3173, "step": 25420 }, { "epoch": 0.21036522314596517, "grad_norm": 1288.0115966796875, "learning_rate": 9.664799406380302e-06, "loss": 129.2124, "step": 25430 }, { "epoch": 0.2104479463953344, "grad_norm": 734.3385009765625, "learning_rate": 9.664291516361597e-06, "loss": 141.6067, "step": 25440 }, { "epoch": 0.21053066964470366, "grad_norm": 808.9627685546875, "learning_rate": 9.663783255226622e-06, "loss": 128.5043, "step": 25450 }, { "epoch": 0.21061339289407288, "grad_norm": 2131.609619140625, "learning_rate": 9.663274623015816e-06, "loss": 130.3591, "step": 25460 }, { "epoch": 0.2106961161434421, "grad_norm": 1364.452880859375, "learning_rate": 9.662765619769651e-06, "loss": 186.9397, "step": 25470 }, { "epoch": 0.21077883939281136, "grad_norm": 1547.8067626953125, "learning_rate": 9.662256245528622e-06, "loss": 130.2646, "step": 25480 }, { "epoch": 0.2108615626421806, "grad_norm": 760.1724853515625, "learning_rate": 9.661746500333265e-06, "loss": 107.9425, "step": 25490 }, { "epoch": 0.21094428589154982, "grad_norm": 691.5214233398438, "learning_rate": 9.66123638422413e-06, "loss": 131.0554, "step": 25500 }, { "epoch": 0.21102700914091904, "grad_norm": 1053.4617919921875, "learning_rate": 9.66072589724181e-06, "loss": 139.2496, "step": 25510 }, { "epoch": 0.2111097323902883, "grad_norm": 903.3119506835938, "learning_rate": 9.66021503942692e-06, "loss": 103.5156, "step": 25520 }, { "epoch": 0.21119245563965752, "grad_norm": 1562.44482421875, "learning_rate": 9.659703810820105e-06, "loss": 154.1499, "step": 25530 }, { "epoch": 0.21127517888902675, "grad_norm": 724.2481079101562, "learning_rate": 9.659192211462043e-06, "loss": 122.7316, "step": 25540 }, { "epoch": 0.211357902138396, "grad_norm": 1171.6414794921875, "learning_rate": 9.658680241393441e-06, "loss": 112.3694, "step": 25550 }, { "epoch": 0.21144062538776523, "grad_norm": 687.9328002929688, "learning_rate": 9.658167900655032e-06, "loss": 85.1699, "step": 25560 }, { "epoch": 0.21152334863713446, "grad_norm": 1203.814208984375, "learning_rate": 9.657655189287582e-06, "loss": 111.8283, "step": 25570 }, { "epoch": 0.2116060718865037, "grad_norm": 2333.49365234375, "learning_rate": 9.657142107331883e-06, "loss": 168.9521, "step": 25580 }, { "epoch": 0.21168879513587294, "grad_norm": 919.4609375, "learning_rate": 9.65662865482876e-06, "loss": 110.3846, "step": 25590 }, { "epoch": 0.21177151838524216, "grad_norm": 1061.5003662109375, "learning_rate": 9.656114831819067e-06, "loss": 133.6754, "step": 25600 }, { "epoch": 0.21185424163461142, "grad_norm": 498.1741638183594, "learning_rate": 9.655600638343685e-06, "loss": 109.6762, "step": 25610 }, { "epoch": 0.21193696488398064, "grad_norm": 946.8280639648438, "learning_rate": 9.655086074443527e-06, "loss": 183.0106, "step": 25620 }, { "epoch": 0.21201968813334987, "grad_norm": 1837.9036865234375, "learning_rate": 9.654571140159534e-06, "loss": 122.9791, "step": 25630 }, { "epoch": 0.21210241138271912, "grad_norm": 1461.47900390625, "learning_rate": 9.654055835532676e-06, "loss": 191.4103, "step": 25640 }, { "epoch": 0.21218513463208835, "grad_norm": 820.8297119140625, "learning_rate": 9.653540160603956e-06, "loss": 115.1019, "step": 25650 }, { "epoch": 0.21226785788145758, "grad_norm": 1157.8956298828125, "learning_rate": 9.653024115414402e-06, "loss": 142.3736, "step": 25660 }, { "epoch": 0.21235058113082683, "grad_norm": 917.7392578125, "learning_rate": 9.652507700005072e-06, "loss": 130.4384, "step": 25670 }, { "epoch": 0.21243330438019606, "grad_norm": 1544.7633056640625, "learning_rate": 9.651990914417057e-06, "loss": 135.7345, "step": 25680 }, { "epoch": 0.21251602762956529, "grad_norm": 768.8818359375, "learning_rate": 9.651473758691477e-06, "loss": 99.1156, "step": 25690 }, { "epoch": 0.2125987508789345, "grad_norm": 1055.4837646484375, "learning_rate": 9.650956232869475e-06, "loss": 113.1447, "step": 25700 }, { "epoch": 0.21268147412830377, "grad_norm": 1085.100341796875, "learning_rate": 9.650438336992231e-06, "loss": 156.4204, "step": 25710 }, { "epoch": 0.212764197377673, "grad_norm": 535.8530883789062, "learning_rate": 9.64992007110095e-06, "loss": 113.1042, "step": 25720 }, { "epoch": 0.21284692062704222, "grad_norm": 1140.1822509765625, "learning_rate": 9.64940143523687e-06, "loss": 134.6167, "step": 25730 }, { "epoch": 0.21292964387641147, "grad_norm": 1358.8885498046875, "learning_rate": 9.648882429441258e-06, "loss": 133.8381, "step": 25740 }, { "epoch": 0.2130123671257807, "grad_norm": 1481.0440673828125, "learning_rate": 9.648363053755406e-06, "loss": 120.2028, "step": 25750 }, { "epoch": 0.21309509037514993, "grad_norm": 1080.2623291015625, "learning_rate": 9.647843308220636e-06, "loss": 105.3537, "step": 25760 }, { "epoch": 0.21317781362451918, "grad_norm": 712.189453125, "learning_rate": 9.647323192878306e-06, "loss": 101.1071, "step": 25770 }, { "epoch": 0.2132605368738884, "grad_norm": 1282.9239501953125, "learning_rate": 9.646802707769798e-06, "loss": 121.2276, "step": 25780 }, { "epoch": 0.21334326012325763, "grad_norm": 558.678955078125, "learning_rate": 9.646281852936525e-06, "loss": 92.6775, "step": 25790 }, { "epoch": 0.2134259833726269, "grad_norm": 994.920654296875, "learning_rate": 9.64576062841993e-06, "loss": 124.3649, "step": 25800 }, { "epoch": 0.2135087066219961, "grad_norm": 1420.953857421875, "learning_rate": 9.64523903426148e-06, "loss": 122.571, "step": 25810 }, { "epoch": 0.21359142987136534, "grad_norm": 859.2244873046875, "learning_rate": 9.64471707050268e-06, "loss": 126.6968, "step": 25820 }, { "epoch": 0.2136741531207346, "grad_norm": 1234.9310302734375, "learning_rate": 9.644194737185058e-06, "loss": 139.6012, "step": 25830 }, { "epoch": 0.21375687637010382, "grad_norm": 1804.944091796875, "learning_rate": 9.643672034350177e-06, "loss": 150.6442, "step": 25840 }, { "epoch": 0.21383959961947305, "grad_norm": 1387.7882080078125, "learning_rate": 9.643148962039622e-06, "loss": 93.8409, "step": 25850 }, { "epoch": 0.2139223228688423, "grad_norm": 1455.9395751953125, "learning_rate": 9.642625520295014e-06, "loss": 174.9808, "step": 25860 }, { "epoch": 0.21400504611821153, "grad_norm": 1184.424072265625, "learning_rate": 9.642101709158001e-06, "loss": 129.4004, "step": 25870 }, { "epoch": 0.21408776936758075, "grad_norm": 1059.5274658203125, "learning_rate": 9.641577528670257e-06, "loss": 129.7015, "step": 25880 }, { "epoch": 0.21417049261694998, "grad_norm": 753.4532470703125, "learning_rate": 9.641052978873494e-06, "loss": 109.8265, "step": 25890 }, { "epoch": 0.21425321586631924, "grad_norm": 676.7515258789062, "learning_rate": 9.640528059809442e-06, "loss": 74.3417, "step": 25900 }, { "epoch": 0.21433593911568846, "grad_norm": 2531.762939453125, "learning_rate": 9.640002771519872e-06, "loss": 133.8275, "step": 25910 }, { "epoch": 0.2144186623650577, "grad_norm": 1403.41943359375, "learning_rate": 9.639477114046575e-06, "loss": 164.8685, "step": 25920 }, { "epoch": 0.21450138561442694, "grad_norm": 922.8995971679688, "learning_rate": 9.638951087431376e-06, "loss": 114.9469, "step": 25930 }, { "epoch": 0.21458410886379617, "grad_norm": 1005.201416015625, "learning_rate": 9.638424691716129e-06, "loss": 144.4065, "step": 25940 }, { "epoch": 0.2146668321131654, "grad_norm": 917.942626953125, "learning_rate": 9.637897926942716e-06, "loss": 140.8008, "step": 25950 }, { "epoch": 0.21474955536253465, "grad_norm": 1009.6735229492188, "learning_rate": 9.637370793153051e-06, "loss": 132.449, "step": 25960 }, { "epoch": 0.21483227861190388, "grad_norm": 904.17529296875, "learning_rate": 9.636843290389076e-06, "loss": 114.3653, "step": 25970 }, { "epoch": 0.2149150018612731, "grad_norm": 1091.709716796875, "learning_rate": 9.636315418692759e-06, "loss": 108.0948, "step": 25980 }, { "epoch": 0.21499772511064236, "grad_norm": 2188.771240234375, "learning_rate": 9.635787178106102e-06, "loss": 107.5951, "step": 25990 }, { "epoch": 0.21508044836001158, "grad_norm": 1075.4591064453125, "learning_rate": 9.635258568671135e-06, "loss": 137.4553, "step": 26000 }, { "epoch": 0.2151631716093808, "grad_norm": 916.110107421875, "learning_rate": 9.634729590429917e-06, "loss": 123.9193, "step": 26010 }, { "epoch": 0.21524589485875006, "grad_norm": 1345.30615234375, "learning_rate": 9.634200243424535e-06, "loss": 139.9196, "step": 26020 }, { "epoch": 0.2153286181081193, "grad_norm": 1103.3697509765625, "learning_rate": 9.633670527697108e-06, "loss": 130.9413, "step": 26030 }, { "epoch": 0.21541134135748852, "grad_norm": 1355.6485595703125, "learning_rate": 9.633140443289784e-06, "loss": 190.8187, "step": 26040 }, { "epoch": 0.21549406460685774, "grad_norm": 575.96142578125, "learning_rate": 9.632609990244737e-06, "loss": 103.7051, "step": 26050 }, { "epoch": 0.215576787856227, "grad_norm": 779.9686889648438, "learning_rate": 9.632079168604175e-06, "loss": 109.2936, "step": 26060 }, { "epoch": 0.21565951110559622, "grad_norm": 1010.6686401367188, "learning_rate": 9.63154797841033e-06, "loss": 159.4392, "step": 26070 }, { "epoch": 0.21574223435496545, "grad_norm": 826.314208984375, "learning_rate": 9.63101641970547e-06, "loss": 129.3246, "step": 26080 }, { "epoch": 0.2158249576043347, "grad_norm": 898.4963989257812, "learning_rate": 9.630484492531886e-06, "loss": 129.947, "step": 26090 }, { "epoch": 0.21590768085370393, "grad_norm": 843.03857421875, "learning_rate": 9.629952196931902e-06, "loss": 113.0155, "step": 26100 }, { "epoch": 0.21599040410307316, "grad_norm": 660.1622314453125, "learning_rate": 9.629419532947872e-06, "loss": 156.4598, "step": 26110 }, { "epoch": 0.2160731273524424, "grad_norm": 1190.5133056640625, "learning_rate": 9.628886500622174e-06, "loss": 128.8638, "step": 26120 }, { "epoch": 0.21615585060181164, "grad_norm": 924.6058959960938, "learning_rate": 9.62835309999722e-06, "loss": 99.8876, "step": 26130 }, { "epoch": 0.21623857385118087, "grad_norm": 1131.3111572265625, "learning_rate": 9.627819331115453e-06, "loss": 126.6344, "step": 26140 }, { "epoch": 0.21632129710055012, "grad_norm": 784.7423706054688, "learning_rate": 9.627285194019342e-06, "loss": 102.5163, "step": 26150 }, { "epoch": 0.21640402034991935, "grad_norm": 1271.62890625, "learning_rate": 9.626750688751382e-06, "loss": 115.6172, "step": 26160 }, { "epoch": 0.21648674359928857, "grad_norm": 1133.4263916015625, "learning_rate": 9.626215815354104e-06, "loss": 98.0378, "step": 26170 }, { "epoch": 0.21656946684865783, "grad_norm": 822.8490600585938, "learning_rate": 9.625680573870067e-06, "loss": 123.6515, "step": 26180 }, { "epoch": 0.21665219009802705, "grad_norm": 796.753662109375, "learning_rate": 9.625144964341853e-06, "loss": 127.32, "step": 26190 }, { "epoch": 0.21673491334739628, "grad_norm": 976.165771484375, "learning_rate": 9.624608986812082e-06, "loss": 113.2206, "step": 26200 }, { "epoch": 0.21681763659676553, "grad_norm": 1549.4820556640625, "learning_rate": 9.624072641323398e-06, "loss": 121.0571, "step": 26210 }, { "epoch": 0.21690035984613476, "grad_norm": 1164.861328125, "learning_rate": 9.623535927918474e-06, "loss": 168.508, "step": 26220 }, { "epoch": 0.216983083095504, "grad_norm": 1254.7081298828125, "learning_rate": 9.622998846640018e-06, "loss": 114.7848, "step": 26230 }, { "epoch": 0.2170658063448732, "grad_norm": 744.2569580078125, "learning_rate": 9.62246139753076e-06, "loss": 125.7201, "step": 26240 }, { "epoch": 0.21714852959424247, "grad_norm": 1193.7801513671875, "learning_rate": 9.621923580633462e-06, "loss": 109.2309, "step": 26250 }, { "epoch": 0.2172312528436117, "grad_norm": 933.6427612304688, "learning_rate": 9.621385395990915e-06, "loss": 137.9964, "step": 26260 }, { "epoch": 0.21731397609298092, "grad_norm": 1021.1292724609375, "learning_rate": 9.620846843645944e-06, "loss": 105.2249, "step": 26270 }, { "epoch": 0.21739669934235017, "grad_norm": 1435.4403076171875, "learning_rate": 9.620307923641395e-06, "loss": 120.3464, "step": 26280 }, { "epoch": 0.2174794225917194, "grad_norm": 1647.5338134765625, "learning_rate": 9.61976863602015e-06, "loss": 115.3988, "step": 26290 }, { "epoch": 0.21756214584108863, "grad_norm": 729.9489135742188, "learning_rate": 9.619228980825114e-06, "loss": 171.7545, "step": 26300 }, { "epoch": 0.21764486909045788, "grad_norm": 1271.6209716796875, "learning_rate": 9.61868895809923e-06, "loss": 120.3744, "step": 26310 }, { "epoch": 0.2177275923398271, "grad_norm": 844.6718139648438, "learning_rate": 9.618148567885462e-06, "loss": 142.1199, "step": 26320 }, { "epoch": 0.21781031558919633, "grad_norm": 837.2586669921875, "learning_rate": 9.617607810226806e-06, "loss": 154.3331, "step": 26330 }, { "epoch": 0.2178930388385656, "grad_norm": 1400.4326171875, "learning_rate": 9.61706668516629e-06, "loss": 124.3974, "step": 26340 }, { "epoch": 0.21797576208793482, "grad_norm": 1115.8116455078125, "learning_rate": 9.616525192746965e-06, "loss": 105.8458, "step": 26350 }, { "epoch": 0.21805848533730404, "grad_norm": 1421.916748046875, "learning_rate": 9.61598333301192e-06, "loss": 156.9738, "step": 26360 }, { "epoch": 0.2181412085866733, "grad_norm": 798.7007446289062, "learning_rate": 9.615441106004264e-06, "loss": 111.3176, "step": 26370 }, { "epoch": 0.21822393183604252, "grad_norm": 1311.8187255859375, "learning_rate": 9.614898511767142e-06, "loss": 112.8957, "step": 26380 }, { "epoch": 0.21830665508541175, "grad_norm": 1265.518798828125, "learning_rate": 9.614355550343724e-06, "loss": 97.6749, "step": 26390 }, { "epoch": 0.218389378334781, "grad_norm": 1181.8995361328125, "learning_rate": 9.613812221777212e-06, "loss": 126.788, "step": 26400 }, { "epoch": 0.21847210158415023, "grad_norm": 921.5441284179688, "learning_rate": 9.613268526110838e-06, "loss": 126.4273, "step": 26410 }, { "epoch": 0.21855482483351946, "grad_norm": 2307.983642578125, "learning_rate": 9.612724463387857e-06, "loss": 124.0576, "step": 26420 }, { "epoch": 0.21863754808288868, "grad_norm": 1391.1617431640625, "learning_rate": 9.612180033651561e-06, "loss": 122.5418, "step": 26430 }, { "epoch": 0.21872027133225794, "grad_norm": 883.1130981445312, "learning_rate": 9.611635236945267e-06, "loss": 105.9835, "step": 26440 }, { "epoch": 0.21880299458162716, "grad_norm": 1083.864990234375, "learning_rate": 9.61109007331232e-06, "loss": 124.8244, "step": 26450 }, { "epoch": 0.2188857178309964, "grad_norm": 1159.39697265625, "learning_rate": 9.610544542796101e-06, "loss": 120.3728, "step": 26460 }, { "epoch": 0.21896844108036564, "grad_norm": 1433.8590087890625, "learning_rate": 9.609998645440011e-06, "loss": 132.6535, "step": 26470 }, { "epoch": 0.21905116432973487, "grad_norm": 1183.7196044921875, "learning_rate": 9.609452381287486e-06, "loss": 133.8586, "step": 26480 }, { "epoch": 0.2191338875791041, "grad_norm": 859.5003662109375, "learning_rate": 9.608905750381988e-06, "loss": 101.3727, "step": 26490 }, { "epoch": 0.21921661082847335, "grad_norm": 800.9340209960938, "learning_rate": 9.608358752767013e-06, "loss": 142.6461, "step": 26500 }, { "epoch": 0.21929933407784258, "grad_norm": 1471.0047607421875, "learning_rate": 9.60781138848608e-06, "loss": 110.0766, "step": 26510 }, { "epoch": 0.2193820573272118, "grad_norm": 954.5092163085938, "learning_rate": 9.607263657582744e-06, "loss": 110.3453, "step": 26520 }, { "epoch": 0.21946478057658106, "grad_norm": 847.3948974609375, "learning_rate": 9.60671556010058e-06, "loss": 119.9163, "step": 26530 }, { "epoch": 0.21954750382595029, "grad_norm": 559.4891357421875, "learning_rate": 9.606167096083205e-06, "loss": 106.7365, "step": 26540 }, { "epoch": 0.2196302270753195, "grad_norm": 1348.5836181640625, "learning_rate": 9.60561826557425e-06, "loss": 148.4306, "step": 26550 }, { "epoch": 0.21971295032468877, "grad_norm": 912.4396362304688, "learning_rate": 9.60506906861739e-06, "loss": 116.1079, "step": 26560 }, { "epoch": 0.219795673574058, "grad_norm": 939.4193115234375, "learning_rate": 9.604519505256316e-06, "loss": 127.2096, "step": 26570 }, { "epoch": 0.21987839682342722, "grad_norm": 807.13623046875, "learning_rate": 9.603969575534757e-06, "loss": 102.2194, "step": 26580 }, { "epoch": 0.21996112007279647, "grad_norm": 695.100830078125, "learning_rate": 9.60341927949647e-06, "loss": 113.9714, "step": 26590 }, { "epoch": 0.2200438433221657, "grad_norm": 456.3763122558594, "learning_rate": 9.602868617185238e-06, "loss": 105.249, "step": 26600 }, { "epoch": 0.22012656657153493, "grad_norm": 1128.632568359375, "learning_rate": 9.602317588644872e-06, "loss": 104.7491, "step": 26610 }, { "epoch": 0.22020928982090415, "grad_norm": 948.9160766601562, "learning_rate": 9.601766193919217e-06, "loss": 104.4173, "step": 26620 }, { "epoch": 0.2202920130702734, "grad_norm": 725.5731201171875, "learning_rate": 9.601214433052147e-06, "loss": 103.2853, "step": 26630 }, { "epoch": 0.22037473631964263, "grad_norm": 979.1326293945312, "learning_rate": 9.600662306087562e-06, "loss": 122.0349, "step": 26640 }, { "epoch": 0.22045745956901186, "grad_norm": 772.8959350585938, "learning_rate": 9.600109813069389e-06, "loss": 118.9232, "step": 26650 }, { "epoch": 0.22054018281838111, "grad_norm": 879.559814453125, "learning_rate": 9.599556954041591e-06, "loss": 154.7716, "step": 26660 }, { "epoch": 0.22062290606775034, "grad_norm": 852.1553344726562, "learning_rate": 9.599003729048157e-06, "loss": 115.1464, "step": 26670 }, { "epoch": 0.22070562931711957, "grad_norm": 1091.1187744140625, "learning_rate": 9.598450138133101e-06, "loss": 124.5991, "step": 26680 }, { "epoch": 0.22078835256648882, "grad_norm": 1003.1347045898438, "learning_rate": 9.597896181340471e-06, "loss": 133.6112, "step": 26690 }, { "epoch": 0.22087107581585805, "grad_norm": 1310.1240234375, "learning_rate": 9.597341858714344e-06, "loss": 120.8151, "step": 26700 }, { "epoch": 0.22095379906522727, "grad_norm": 973.50439453125, "learning_rate": 9.596787170298824e-06, "loss": 132.3573, "step": 26710 }, { "epoch": 0.22103652231459653, "grad_norm": 838.1239013671875, "learning_rate": 9.596232116138047e-06, "loss": 135.2263, "step": 26720 }, { "epoch": 0.22111924556396576, "grad_norm": 776.5439453125, "learning_rate": 9.595676696276173e-06, "loss": 140.2891, "step": 26730 }, { "epoch": 0.22120196881333498, "grad_norm": 898.1226806640625, "learning_rate": 9.595120910757396e-06, "loss": 156.5662, "step": 26740 }, { "epoch": 0.22128469206270424, "grad_norm": 988.5482788085938, "learning_rate": 9.594564759625936e-06, "loss": 119.9368, "step": 26750 }, { "epoch": 0.22136741531207346, "grad_norm": 2739.469482421875, "learning_rate": 9.594008242926046e-06, "loss": 117.0178, "step": 26760 }, { "epoch": 0.2214501385614427, "grad_norm": 1172.0687255859375, "learning_rate": 9.593451360702003e-06, "loss": 109.8631, "step": 26770 }, { "epoch": 0.22153286181081192, "grad_norm": 1231.6884765625, "learning_rate": 9.592894112998115e-06, "loss": 123.6822, "step": 26780 }, { "epoch": 0.22161558506018117, "grad_norm": 2576.349609375, "learning_rate": 9.592336499858721e-06, "loss": 134.2115, "step": 26790 }, { "epoch": 0.2216983083095504, "grad_norm": 851.02197265625, "learning_rate": 9.59177852132819e-06, "loss": 130.6942, "step": 26800 }, { "epoch": 0.22178103155891962, "grad_norm": 1379.3516845703125, "learning_rate": 9.591220177450912e-06, "loss": 148.0982, "step": 26810 }, { "epoch": 0.22186375480828888, "grad_norm": 1033.928955078125, "learning_rate": 9.590661468271319e-06, "loss": 99.2162, "step": 26820 }, { "epoch": 0.2219464780576581, "grad_norm": 987.3099365234375, "learning_rate": 9.59010239383386e-06, "loss": 138.9152, "step": 26830 }, { "epoch": 0.22202920130702733, "grad_norm": 687.860595703125, "learning_rate": 9.589542954183018e-06, "loss": 112.7026, "step": 26840 }, { "epoch": 0.22211192455639658, "grad_norm": 732.00537109375, "learning_rate": 9.588983149363307e-06, "loss": 123.2144, "step": 26850 }, { "epoch": 0.2221946478057658, "grad_norm": 1079.510009765625, "learning_rate": 9.588422979419267e-06, "loss": 87.7841, "step": 26860 }, { "epoch": 0.22227737105513504, "grad_norm": 1468.6038818359375, "learning_rate": 9.587862444395471e-06, "loss": 136.3903, "step": 26870 }, { "epoch": 0.2223600943045043, "grad_norm": 889.5414428710938, "learning_rate": 9.587301544336513e-06, "loss": 115.5707, "step": 26880 }, { "epoch": 0.22244281755387352, "grad_norm": 590.138916015625, "learning_rate": 9.586740279287024e-06, "loss": 117.9152, "step": 26890 }, { "epoch": 0.22252554080324274, "grad_norm": 1332.5377197265625, "learning_rate": 9.586178649291664e-06, "loss": 131.7125, "step": 26900 }, { "epoch": 0.222608264052612, "grad_norm": 1022.7705688476562, "learning_rate": 9.585616654395113e-06, "loss": 115.9927, "step": 26910 }, { "epoch": 0.22269098730198122, "grad_norm": 847.87255859375, "learning_rate": 9.585054294642093e-06, "loss": 169.3321, "step": 26920 }, { "epoch": 0.22277371055135045, "grad_norm": 1865.4990234375, "learning_rate": 9.584491570077343e-06, "loss": 128.1739, "step": 26930 }, { "epoch": 0.2228564338007197, "grad_norm": 865.5852661132812, "learning_rate": 9.58392848074564e-06, "loss": 119.8757, "step": 26940 }, { "epoch": 0.22293915705008893, "grad_norm": 1116.7584228515625, "learning_rate": 9.583365026691785e-06, "loss": 111.9066, "step": 26950 }, { "epoch": 0.22302188029945816, "grad_norm": 1089.7393798828125, "learning_rate": 9.58280120796061e-06, "loss": 123.0073, "step": 26960 }, { "epoch": 0.22310460354882738, "grad_norm": 854.9454956054688, "learning_rate": 9.582237024596974e-06, "loss": 131.7255, "step": 26970 }, { "epoch": 0.22318732679819664, "grad_norm": 840.7254028320312, "learning_rate": 9.581672476645768e-06, "loss": 134.3853, "step": 26980 }, { "epoch": 0.22327005004756587, "grad_norm": 1240.8424072265625, "learning_rate": 9.58110756415191e-06, "loss": 127.8951, "step": 26990 }, { "epoch": 0.2233527732969351, "grad_norm": 1180.210205078125, "learning_rate": 9.580542287160348e-06, "loss": 127.6457, "step": 27000 }, { "epoch": 0.22343549654630435, "grad_norm": 1015.6724853515625, "learning_rate": 9.579976645716058e-06, "loss": 130.7046, "step": 27010 }, { "epoch": 0.22351821979567357, "grad_norm": 1194.459228515625, "learning_rate": 9.579410639864046e-06, "loss": 133.5198, "step": 27020 }, { "epoch": 0.2236009430450428, "grad_norm": 1310.887451171875, "learning_rate": 9.578844269649345e-06, "loss": 123.0892, "step": 27030 }, { "epoch": 0.22368366629441205, "grad_norm": 764.6407470703125, "learning_rate": 9.578277535117022e-06, "loss": 118.5598, "step": 27040 }, { "epoch": 0.22376638954378128, "grad_norm": 1107.6348876953125, "learning_rate": 9.577710436312164e-06, "loss": 113.8774, "step": 27050 }, { "epoch": 0.2238491127931505, "grad_norm": 1522.5843505859375, "learning_rate": 9.577142973279896e-06, "loss": 137.2552, "step": 27060 }, { "epoch": 0.22393183604251976, "grad_norm": 984.351318359375, "learning_rate": 9.576575146065369e-06, "loss": 128.6748, "step": 27070 }, { "epoch": 0.224014559291889, "grad_norm": 642.086181640625, "learning_rate": 9.576006954713762e-06, "loss": 117.2645, "step": 27080 }, { "epoch": 0.2240972825412582, "grad_norm": 1214.22998046875, "learning_rate": 9.57543839927028e-06, "loss": 128.8348, "step": 27090 }, { "epoch": 0.22418000579062747, "grad_norm": 1050.501953125, "learning_rate": 9.574869479780165e-06, "loss": 147.3516, "step": 27100 }, { "epoch": 0.2242627290399967, "grad_norm": 1185.0849609375, "learning_rate": 9.57430019628868e-06, "loss": 147.9033, "step": 27110 }, { "epoch": 0.22434545228936592, "grad_norm": 1115.831298828125, "learning_rate": 9.573730548841122e-06, "loss": 111.629, "step": 27120 }, { "epoch": 0.22442817553873518, "grad_norm": 1058.506103515625, "learning_rate": 9.573160537482816e-06, "loss": 135.1445, "step": 27130 }, { "epoch": 0.2245108987881044, "grad_norm": 1066.4493408203125, "learning_rate": 9.572590162259112e-06, "loss": 171.7336, "step": 27140 }, { "epoch": 0.22459362203747363, "grad_norm": 735.5531005859375, "learning_rate": 9.572019423215395e-06, "loss": 110.0656, "step": 27150 }, { "epoch": 0.22467634528684285, "grad_norm": 1075.277587890625, "learning_rate": 9.571448320397076e-06, "loss": 102.259, "step": 27160 }, { "epoch": 0.2247590685362121, "grad_norm": 677.2155151367188, "learning_rate": 9.570876853849593e-06, "loss": 128.2644, "step": 27170 }, { "epoch": 0.22484179178558134, "grad_norm": 993.1212768554688, "learning_rate": 9.570305023618417e-06, "loss": 185.3893, "step": 27180 }, { "epoch": 0.22492451503495056, "grad_norm": 963.5702514648438, "learning_rate": 9.569732829749045e-06, "loss": 125.3146, "step": 27190 }, { "epoch": 0.22500723828431982, "grad_norm": 950.5331420898438, "learning_rate": 9.569160272287003e-06, "loss": 126.4869, "step": 27200 }, { "epoch": 0.22508996153368904, "grad_norm": 820.460205078125, "learning_rate": 9.56858735127785e-06, "loss": 76.8966, "step": 27210 }, { "epoch": 0.22517268478305827, "grad_norm": 572.8544311523438, "learning_rate": 9.568014066767166e-06, "loss": 126.8829, "step": 27220 }, { "epoch": 0.22525540803242752, "grad_norm": 998.837646484375, "learning_rate": 9.567440418800569e-06, "loss": 134.7057, "step": 27230 }, { "epoch": 0.22533813128179675, "grad_norm": 956.578857421875, "learning_rate": 9.566866407423698e-06, "loss": 143.3908, "step": 27240 }, { "epoch": 0.22542085453116598, "grad_norm": 1049.07861328125, "learning_rate": 9.566292032682228e-06, "loss": 137.0985, "step": 27250 }, { "epoch": 0.22550357778053523, "grad_norm": 870.1298217773438, "learning_rate": 9.565717294621856e-06, "loss": 114.6946, "step": 27260 }, { "epoch": 0.22558630102990446, "grad_norm": 784.1261596679688, "learning_rate": 9.565142193288313e-06, "loss": 150.3023, "step": 27270 }, { "epoch": 0.22566902427927368, "grad_norm": 721.9214477539062, "learning_rate": 9.564566728727358e-06, "loss": 92.1085, "step": 27280 }, { "epoch": 0.22575174752864294, "grad_norm": 819.4923706054688, "learning_rate": 9.563990900984775e-06, "loss": 103.506, "step": 27290 }, { "epoch": 0.22583447077801216, "grad_norm": 2060.950927734375, "learning_rate": 9.563414710106382e-06, "loss": 212.3363, "step": 27300 }, { "epoch": 0.2259171940273814, "grad_norm": 921.5272216796875, "learning_rate": 9.562838156138025e-06, "loss": 142.4072, "step": 27310 }, { "epoch": 0.22599991727675062, "grad_norm": 935.8705444335938, "learning_rate": 9.562261239125575e-06, "loss": 122.9581, "step": 27320 }, { "epoch": 0.22608264052611987, "grad_norm": 925.080810546875, "learning_rate": 9.561683959114938e-06, "loss": 127.3665, "step": 27330 }, { "epoch": 0.2261653637754891, "grad_norm": 1195.8099365234375, "learning_rate": 9.561106316152043e-06, "loss": 133.1869, "step": 27340 }, { "epoch": 0.22624808702485832, "grad_norm": 784.7255249023438, "learning_rate": 9.56052831028285e-06, "loss": 139.8995, "step": 27350 }, { "epoch": 0.22633081027422758, "grad_norm": 1093.8494873046875, "learning_rate": 9.559949941553351e-06, "loss": 119.8402, "step": 27360 }, { "epoch": 0.2264135335235968, "grad_norm": 973.4978637695312, "learning_rate": 9.559371210009562e-06, "loss": 124.6592, "step": 27370 }, { "epoch": 0.22649625677296603, "grad_norm": 869.2335815429688, "learning_rate": 9.55879211569753e-06, "loss": 94.1615, "step": 27380 }, { "epoch": 0.22657898002233529, "grad_norm": 1302.50244140625, "learning_rate": 9.55821265866333e-06, "loss": 123.9871, "step": 27390 }, { "epoch": 0.2266617032717045, "grad_norm": 1908.8446044921875, "learning_rate": 9.55763283895307e-06, "loss": 140.2897, "step": 27400 }, { "epoch": 0.22674442652107374, "grad_norm": 553.2281494140625, "learning_rate": 9.557052656612882e-06, "loss": 98.0197, "step": 27410 }, { "epoch": 0.226827149770443, "grad_norm": 693.7083740234375, "learning_rate": 9.556472111688928e-06, "loss": 121.6574, "step": 27420 }, { "epoch": 0.22690987301981222, "grad_norm": 1643.4599609375, "learning_rate": 9.555891204227399e-06, "loss": 107.1005, "step": 27430 }, { "epoch": 0.22699259626918145, "grad_norm": 792.270263671875, "learning_rate": 9.555309934274515e-06, "loss": 177.7701, "step": 27440 }, { "epoch": 0.2270753195185507, "grad_norm": 456.06939697265625, "learning_rate": 9.554728301876525e-06, "loss": 137.5959, "step": 27450 }, { "epoch": 0.22715804276791993, "grad_norm": 966.9349365234375, "learning_rate": 9.554146307079711e-06, "loss": 99.2951, "step": 27460 }, { "epoch": 0.22724076601728915, "grad_norm": 1171.660400390625, "learning_rate": 9.553563949930374e-06, "loss": 126.8152, "step": 27470 }, { "epoch": 0.2273234892666584, "grad_norm": 631.3641967773438, "learning_rate": 9.552981230474849e-06, "loss": 101.0075, "step": 27480 }, { "epoch": 0.22740621251602763, "grad_norm": 1808.03466796875, "learning_rate": 9.552398148759506e-06, "loss": 105.2634, "step": 27490 }, { "epoch": 0.22748893576539686, "grad_norm": 1841.3206787109375, "learning_rate": 9.551814704830734e-06, "loss": 124.4455, "step": 27500 }, { "epoch": 0.2275716590147661, "grad_norm": 2790.470947265625, "learning_rate": 9.551230898734955e-06, "loss": 143.8618, "step": 27510 }, { "epoch": 0.22765438226413534, "grad_norm": 916.5533447265625, "learning_rate": 9.550646730518623e-06, "loss": 105.8261, "step": 27520 }, { "epoch": 0.22773710551350457, "grad_norm": 1437.979248046875, "learning_rate": 9.550062200228214e-06, "loss": 101.92, "step": 27530 }, { "epoch": 0.2278198287628738, "grad_norm": 1014.4559326171875, "learning_rate": 9.549477307910238e-06, "loss": 126.4266, "step": 27540 }, { "epoch": 0.22790255201224305, "grad_norm": 1683.48779296875, "learning_rate": 9.548892053611232e-06, "loss": 133.186, "step": 27550 }, { "epoch": 0.22798527526161227, "grad_norm": 864.87744140625, "learning_rate": 9.54830643737776e-06, "loss": 137.8783, "step": 27560 }, { "epoch": 0.2280679985109815, "grad_norm": 1702.0152587890625, "learning_rate": 9.54772045925642e-06, "loss": 125.6688, "step": 27570 }, { "epoch": 0.22815072176035076, "grad_norm": 1634.5037841796875, "learning_rate": 9.547134119293835e-06, "loss": 126.7895, "step": 27580 }, { "epoch": 0.22823344500971998, "grad_norm": 1126.988525390625, "learning_rate": 9.546547417536656e-06, "loss": 117.5014, "step": 27590 }, { "epoch": 0.2283161682590892, "grad_norm": 774.8274536132812, "learning_rate": 9.545960354031564e-06, "loss": 110.8326, "step": 27600 }, { "epoch": 0.22839889150845846, "grad_norm": 920.1649780273438, "learning_rate": 9.545372928825271e-06, "loss": 85.081, "step": 27610 }, { "epoch": 0.2284816147578277, "grad_norm": 550.8583374023438, "learning_rate": 9.544785141964514e-06, "loss": 82.1225, "step": 27620 }, { "epoch": 0.22856433800719692, "grad_norm": 2031.975830078125, "learning_rate": 9.544196993496062e-06, "loss": 113.8713, "step": 27630 }, { "epoch": 0.22864706125656617, "grad_norm": 660.8304443359375, "learning_rate": 9.54360848346671e-06, "loss": 111.5653, "step": 27640 }, { "epoch": 0.2287297845059354, "grad_norm": 1158.63916015625, "learning_rate": 9.543019611923283e-06, "loss": 105.64, "step": 27650 }, { "epoch": 0.22881250775530462, "grad_norm": 1066.19970703125, "learning_rate": 9.542430378912634e-06, "loss": 112.1879, "step": 27660 }, { "epoch": 0.22889523100467388, "grad_norm": 1097.3199462890625, "learning_rate": 9.541840784481648e-06, "loss": 112.1237, "step": 27670 }, { "epoch": 0.2289779542540431, "grad_norm": 1081.3756103515625, "learning_rate": 9.541250828677235e-06, "loss": 144.3419, "step": 27680 }, { "epoch": 0.22906067750341233, "grad_norm": 899.7421264648438, "learning_rate": 9.540660511546335e-06, "loss": 129.8028, "step": 27690 }, { "epoch": 0.22914340075278156, "grad_norm": 835.4717407226562, "learning_rate": 9.540069833135917e-06, "loss": 131.3196, "step": 27700 }, { "epoch": 0.2292261240021508, "grad_norm": 603.4605712890625, "learning_rate": 9.539478793492978e-06, "loss": 94.5269, "step": 27710 }, { "epoch": 0.22930884725152004, "grad_norm": 827.4793701171875, "learning_rate": 9.538887392664544e-06, "loss": 97.6406, "step": 27720 }, { "epoch": 0.22939157050088926, "grad_norm": 947.7173461914062, "learning_rate": 9.53829563069767e-06, "loss": 134.4241, "step": 27730 }, { "epoch": 0.22947429375025852, "grad_norm": 1094.2747802734375, "learning_rate": 9.537703507639444e-06, "loss": 102.8811, "step": 27740 }, { "epoch": 0.22955701699962774, "grad_norm": 1054.0640869140625, "learning_rate": 9.537111023536973e-06, "loss": 110.9093, "step": 27750 }, { "epoch": 0.22963974024899697, "grad_norm": 902.435302734375, "learning_rate": 9.536518178437402e-06, "loss": 120.4104, "step": 27760 }, { "epoch": 0.22972246349836623, "grad_norm": 1348.26025390625, "learning_rate": 9.535924972387898e-06, "loss": 109.3034, "step": 27770 }, { "epoch": 0.22980518674773545, "grad_norm": 857.2482299804688, "learning_rate": 9.535331405435662e-06, "loss": 125.6188, "step": 27780 }, { "epoch": 0.22988790999710468, "grad_norm": 1263.4981689453125, "learning_rate": 9.534737477627918e-06, "loss": 152.1994, "step": 27790 }, { "epoch": 0.22997063324647393, "grad_norm": 1043.9830322265625, "learning_rate": 9.534143189011928e-06, "loss": 139.1974, "step": 27800 }, { "epoch": 0.23005335649584316, "grad_norm": 1181.6470947265625, "learning_rate": 9.533548539634971e-06, "loss": 124.807, "step": 27810 }, { "epoch": 0.23013607974521239, "grad_norm": 1061.00244140625, "learning_rate": 9.532953529544365e-06, "loss": 127.3019, "step": 27820 }, { "epoch": 0.23021880299458164, "grad_norm": 2072.89404296875, "learning_rate": 9.532358158787446e-06, "loss": 112.2069, "step": 27830 }, { "epoch": 0.23030152624395087, "grad_norm": 726.18505859375, "learning_rate": 9.531762427411592e-06, "loss": 126.4197, "step": 27840 }, { "epoch": 0.2303842494933201, "grad_norm": 1991.278564453125, "learning_rate": 9.531166335464198e-06, "loss": 257.9386, "step": 27850 }, { "epoch": 0.23046697274268935, "grad_norm": 648.2035522460938, "learning_rate": 9.530569882992698e-06, "loss": 153.0686, "step": 27860 }, { "epoch": 0.23054969599205857, "grad_norm": 913.1694946289062, "learning_rate": 9.52997307004454e-06, "loss": 122.5074, "step": 27870 }, { "epoch": 0.2306324192414278, "grad_norm": 1065.9097900390625, "learning_rate": 9.529375896667218e-06, "loss": 95.8606, "step": 27880 }, { "epoch": 0.23071514249079703, "grad_norm": 2741.90234375, "learning_rate": 9.528778362908241e-06, "loss": 141.1961, "step": 27890 }, { "epoch": 0.23079786574016628, "grad_norm": 1098.0302734375, "learning_rate": 9.528180468815155e-06, "loss": 103.5171, "step": 27900 }, { "epoch": 0.2308805889895355, "grad_norm": 899.1497192382812, "learning_rate": 9.527582214435531e-06, "loss": 143.412, "step": 27910 }, { "epoch": 0.23096331223890473, "grad_norm": 311.1416320800781, "learning_rate": 9.526983599816968e-06, "loss": 91.3562, "step": 27920 }, { "epoch": 0.231046035488274, "grad_norm": 1649.770263671875, "learning_rate": 9.526384625007096e-06, "loss": 124.463, "step": 27930 }, { "epoch": 0.23112875873764321, "grad_norm": 804.8977661132812, "learning_rate": 9.525785290053573e-06, "loss": 138.7314, "step": 27940 }, { "epoch": 0.23121148198701244, "grad_norm": 859.3971557617188, "learning_rate": 9.525185595004085e-06, "loss": 85.416, "step": 27950 }, { "epoch": 0.2312942052363817, "grad_norm": 901.3175659179688, "learning_rate": 9.524585539906345e-06, "loss": 93.4797, "step": 27960 }, { "epoch": 0.23137692848575092, "grad_norm": 1204.8818359375, "learning_rate": 9.523985124808102e-06, "loss": 140.1107, "step": 27970 }, { "epoch": 0.23145965173512015, "grad_norm": 1186.2174072265625, "learning_rate": 9.523384349757123e-06, "loss": 111.8204, "step": 27980 }, { "epoch": 0.2315423749844894, "grad_norm": 1178.4058837890625, "learning_rate": 9.522783214801213e-06, "loss": 164.7376, "step": 27990 }, { "epoch": 0.23162509823385863, "grad_norm": 864.9526977539062, "learning_rate": 9.522181719988196e-06, "loss": 112.4256, "step": 28000 }, { "epoch": 0.23170782148322785, "grad_norm": 1473.5499267578125, "learning_rate": 9.521579865365935e-06, "loss": 117.2831, "step": 28010 }, { "epoch": 0.2317905447325971, "grad_norm": 525.5335083007812, "learning_rate": 9.520977650982316e-06, "loss": 150.6173, "step": 28020 }, { "epoch": 0.23187326798196634, "grad_norm": 886.9190063476562, "learning_rate": 9.520375076885253e-06, "loss": 146.7385, "step": 28030 }, { "epoch": 0.23195599123133556, "grad_norm": 937.6173095703125, "learning_rate": 9.519772143122691e-06, "loss": 104.6981, "step": 28040 }, { "epoch": 0.2320387144807048, "grad_norm": 834.8984375, "learning_rate": 9.519168849742603e-06, "loss": 96.541, "step": 28050 }, { "epoch": 0.23212143773007404, "grad_norm": 863.10986328125, "learning_rate": 9.51856519679299e-06, "loss": 93.61, "step": 28060 }, { "epoch": 0.23220416097944327, "grad_norm": 683.4520263671875, "learning_rate": 9.517961184321882e-06, "loss": 131.8342, "step": 28070 }, { "epoch": 0.2322868842288125, "grad_norm": 2610.283203125, "learning_rate": 9.517356812377336e-06, "loss": 189.3643, "step": 28080 }, { "epoch": 0.23236960747818175, "grad_norm": 993.5272827148438, "learning_rate": 9.516752081007441e-06, "loss": 133.2566, "step": 28090 }, { "epoch": 0.23245233072755098, "grad_norm": 1212.4417724609375, "learning_rate": 9.51614699026031e-06, "loss": 129.3865, "step": 28100 }, { "epoch": 0.2325350539769202, "grad_norm": 812.443359375, "learning_rate": 9.515541540184093e-06, "loss": 95.0065, "step": 28110 }, { "epoch": 0.23261777722628946, "grad_norm": 1204.5474853515625, "learning_rate": 9.514935730826957e-06, "loss": 145.4519, "step": 28120 }, { "epoch": 0.23270050047565868, "grad_norm": 1555.590576171875, "learning_rate": 9.514329562237107e-06, "loss": 136.7933, "step": 28130 }, { "epoch": 0.2327832237250279, "grad_norm": 870.7056274414062, "learning_rate": 9.51372303446277e-06, "loss": 122.1039, "step": 28140 }, { "epoch": 0.23286594697439716, "grad_norm": 1071.2955322265625, "learning_rate": 9.513116147552207e-06, "loss": 102.6043, "step": 28150 }, { "epoch": 0.2329486702237664, "grad_norm": 797.454833984375, "learning_rate": 9.512508901553703e-06, "loss": 140.6481, "step": 28160 }, { "epoch": 0.23303139347313562, "grad_norm": 585.2443237304688, "learning_rate": 9.511901296515578e-06, "loss": 113.4713, "step": 28170 }, { "epoch": 0.23311411672250487, "grad_norm": 870.2528076171875, "learning_rate": 9.511293332486172e-06, "loss": 130.058, "step": 28180 }, { "epoch": 0.2331968399718741, "grad_norm": 1487.830322265625, "learning_rate": 9.51068500951386e-06, "loss": 132.6228, "step": 28190 }, { "epoch": 0.23327956322124332, "grad_norm": 3511.711181640625, "learning_rate": 9.510076327647043e-06, "loss": 113.2403, "step": 28200 }, { "epoch": 0.23336228647061258, "grad_norm": 1605.590087890625, "learning_rate": 9.509467286934151e-06, "loss": 150.1105, "step": 28210 }, { "epoch": 0.2334450097199818, "grad_norm": 1058.44189453125, "learning_rate": 9.508857887423644e-06, "loss": 122.3643, "step": 28220 }, { "epoch": 0.23352773296935103, "grad_norm": 820.7523803710938, "learning_rate": 9.508248129164006e-06, "loss": 80.4105, "step": 28230 }, { "epoch": 0.23361045621872026, "grad_norm": 912.558837890625, "learning_rate": 9.507638012203755e-06, "loss": 152.501, "step": 28240 }, { "epoch": 0.2336931794680895, "grad_norm": 1337.6898193359375, "learning_rate": 9.507027536591436e-06, "loss": 149.5806, "step": 28250 }, { "epoch": 0.23377590271745874, "grad_norm": 1526.1043701171875, "learning_rate": 9.506416702375618e-06, "loss": 153.4466, "step": 28260 }, { "epoch": 0.23385862596682797, "grad_norm": 933.4179077148438, "learning_rate": 9.505805509604906e-06, "loss": 106.969, "step": 28270 }, { "epoch": 0.23394134921619722, "grad_norm": 1189.154296875, "learning_rate": 9.505193958327927e-06, "loss": 129.8097, "step": 28280 }, { "epoch": 0.23402407246556645, "grad_norm": 691.6627197265625, "learning_rate": 9.504582048593343e-06, "loss": 99.3678, "step": 28290 }, { "epoch": 0.23410679571493567, "grad_norm": 1236.778076171875, "learning_rate": 9.503969780449838e-06, "loss": 119.0243, "step": 28300 }, { "epoch": 0.23418951896430493, "grad_norm": 1243.570068359375, "learning_rate": 9.503357153946126e-06, "loss": 104.1002, "step": 28310 }, { "epoch": 0.23427224221367415, "grad_norm": 1092.5941162109375, "learning_rate": 9.502744169130955e-06, "loss": 97.2079, "step": 28320 }, { "epoch": 0.23435496546304338, "grad_norm": 1051.38671875, "learning_rate": 9.502130826053095e-06, "loss": 132.0031, "step": 28330 }, { "epoch": 0.23443768871241263, "grad_norm": 1343.4345703125, "learning_rate": 9.501517124761347e-06, "loss": 112.9695, "step": 28340 }, { "epoch": 0.23452041196178186, "grad_norm": 637.9159545898438, "learning_rate": 9.50090306530454e-06, "loss": 85.6707, "step": 28350 }, { "epoch": 0.2346031352111511, "grad_norm": 976.9581909179688, "learning_rate": 9.500288647731533e-06, "loss": 127.3839, "step": 28360 }, { "epoch": 0.23468585846052034, "grad_norm": 1000.0506591796875, "learning_rate": 9.49967387209121e-06, "loss": 168.1098, "step": 28370 }, { "epoch": 0.23476858170988957, "grad_norm": 838.3327026367188, "learning_rate": 9.499058738432492e-06, "loss": 112.4135, "step": 28380 }, { "epoch": 0.2348513049592588, "grad_norm": 708.0794067382812, "learning_rate": 9.498443246804314e-06, "loss": 120.7116, "step": 28390 }, { "epoch": 0.23493402820862805, "grad_norm": 1063.5574951171875, "learning_rate": 9.497827397255655e-06, "loss": 99.907, "step": 28400 }, { "epoch": 0.23501675145799727, "grad_norm": 933.8026123046875, "learning_rate": 9.49721118983551e-06, "loss": 153.6268, "step": 28410 }, { "epoch": 0.2350994747073665, "grad_norm": 836.0083618164062, "learning_rate": 9.49659462459291e-06, "loss": 123.5795, "step": 28420 }, { "epoch": 0.23518219795673573, "grad_norm": 1091.86669921875, "learning_rate": 9.495977701576913e-06, "loss": 141.2293, "step": 28430 }, { "epoch": 0.23526492120610498, "grad_norm": 960.7584838867188, "learning_rate": 9.495360420836603e-06, "loss": 113.6143, "step": 28440 }, { "epoch": 0.2353476444554742, "grad_norm": 1572.40771484375, "learning_rate": 9.494742782421099e-06, "loss": 159.3734, "step": 28450 }, { "epoch": 0.23543036770484344, "grad_norm": 1193.2608642578125, "learning_rate": 9.494124786379535e-06, "loss": 128.3347, "step": 28460 }, { "epoch": 0.2355130909542127, "grad_norm": 1346.21630859375, "learning_rate": 9.49350643276109e-06, "loss": 105.2375, "step": 28470 }, { "epoch": 0.23559581420358192, "grad_norm": 1731.52001953125, "learning_rate": 9.49288772161496e-06, "loss": 134.3214, "step": 28480 }, { "epoch": 0.23567853745295114, "grad_norm": 982.36279296875, "learning_rate": 9.492268652990374e-06, "loss": 120.5295, "step": 28490 }, { "epoch": 0.2357612607023204, "grad_norm": 1203.948486328125, "learning_rate": 9.491649226936586e-06, "loss": 149.1518, "step": 28500 }, { "epoch": 0.23584398395168962, "grad_norm": 1445.0263671875, "learning_rate": 9.491029443502884e-06, "loss": 133.0336, "step": 28510 }, { "epoch": 0.23592670720105885, "grad_norm": 1120.74462890625, "learning_rate": 9.490409302738582e-06, "loss": 104.1838, "step": 28520 }, { "epoch": 0.2360094304504281, "grad_norm": 1055.50830078125, "learning_rate": 9.489788804693017e-06, "loss": 97.1542, "step": 28530 }, { "epoch": 0.23609215369979733, "grad_norm": 645.5867309570312, "learning_rate": 9.489167949415563e-06, "loss": 124.6525, "step": 28540 }, { "epoch": 0.23617487694916656, "grad_norm": 723.2225952148438, "learning_rate": 9.48854673695562e-06, "loss": 117.6365, "step": 28550 }, { "epoch": 0.2362576001985358, "grad_norm": 1657.0540771484375, "learning_rate": 9.48792516736261e-06, "loss": 131.2895, "step": 28560 }, { "epoch": 0.23634032344790504, "grad_norm": 967.8582763671875, "learning_rate": 9.487303240685992e-06, "loss": 100.9019, "step": 28570 }, { "epoch": 0.23642304669727426, "grad_norm": 2011.5921630859375, "learning_rate": 9.48668095697525e-06, "loss": 167.2856, "step": 28580 }, { "epoch": 0.2365057699466435, "grad_norm": 991.98291015625, "learning_rate": 9.486058316279894e-06, "loss": 158.8021, "step": 28590 }, { "epoch": 0.23658849319601274, "grad_norm": 905.6279296875, "learning_rate": 9.485435318649468e-06, "loss": 124.5288, "step": 28600 }, { "epoch": 0.23667121644538197, "grad_norm": 793.5623779296875, "learning_rate": 9.484811964133537e-06, "loss": 138.7439, "step": 28610 }, { "epoch": 0.2367539396947512, "grad_norm": 1176.435791015625, "learning_rate": 9.484188252781701e-06, "loss": 109.5845, "step": 28620 }, { "epoch": 0.23683666294412045, "grad_norm": 840.8705444335938, "learning_rate": 9.483564184643586e-06, "loss": 90.2001, "step": 28630 }, { "epoch": 0.23691938619348968, "grad_norm": 705.6690673828125, "learning_rate": 9.482939759768845e-06, "loss": 145.6554, "step": 28640 }, { "epoch": 0.2370021094428589, "grad_norm": 1252.253662109375, "learning_rate": 9.48231497820716e-06, "loss": 129.5605, "step": 28650 }, { "epoch": 0.23708483269222816, "grad_norm": 1174.8831787109375, "learning_rate": 9.481689840008246e-06, "loss": 121.5843, "step": 28660 }, { "epoch": 0.23716755594159739, "grad_norm": 1164.958740234375, "learning_rate": 9.481064345221838e-06, "loss": 130.8124, "step": 28670 }, { "epoch": 0.2372502791909666, "grad_norm": 1460.279052734375, "learning_rate": 9.480438493897707e-06, "loss": 186.2501, "step": 28680 }, { "epoch": 0.23733300244033587, "grad_norm": 1548.7734375, "learning_rate": 9.479812286085645e-06, "loss": 122.9342, "step": 28690 }, { "epoch": 0.2374157256897051, "grad_norm": 485.3841857910156, "learning_rate": 9.47918572183548e-06, "loss": 116.2105, "step": 28700 }, { "epoch": 0.23749844893907432, "grad_norm": 2227.711181640625, "learning_rate": 9.478558801197065e-06, "loss": 108.795, "step": 28710 }, { "epoch": 0.23758117218844357, "grad_norm": 1480.4208984375, "learning_rate": 9.47793152422028e-06, "loss": 103.7922, "step": 28720 }, { "epoch": 0.2376638954378128, "grad_norm": 845.8829956054688, "learning_rate": 9.477303890955032e-06, "loss": 112.599, "step": 28730 }, { "epoch": 0.23774661868718203, "grad_norm": 593.84619140625, "learning_rate": 9.476675901451264e-06, "loss": 124.1586, "step": 28740 }, { "epoch": 0.23782934193655128, "grad_norm": 636.6671752929688, "learning_rate": 9.476047555758938e-06, "loss": 172.5131, "step": 28750 }, { "epoch": 0.2379120651859205, "grad_norm": 4360.0341796875, "learning_rate": 9.475418853928051e-06, "loss": 191.1747, "step": 28760 }, { "epoch": 0.23799478843528973, "grad_norm": 483.2803649902344, "learning_rate": 9.474789796008625e-06, "loss": 138.2722, "step": 28770 }, { "epoch": 0.23807751168465896, "grad_norm": 1149.00048828125, "learning_rate": 9.474160382050711e-06, "loss": 126.3032, "step": 28780 }, { "epoch": 0.23816023493402821, "grad_norm": 1440.6688232421875, "learning_rate": 9.47353061210439e-06, "loss": 96.6904, "step": 28790 }, { "epoch": 0.23824295818339744, "grad_norm": 2017.08837890625, "learning_rate": 9.47290048621977e-06, "loss": 116.2174, "step": 28800 }, { "epoch": 0.23832568143276667, "grad_norm": 900.5809326171875, "learning_rate": 9.472270004446984e-06, "loss": 110.8572, "step": 28810 }, { "epoch": 0.23840840468213592, "grad_norm": 853.006591796875, "learning_rate": 9.4716391668362e-06, "loss": 115.7237, "step": 28820 }, { "epoch": 0.23849112793150515, "grad_norm": 1115.9259033203125, "learning_rate": 9.471007973437607e-06, "loss": 108.8435, "step": 28830 }, { "epoch": 0.23857385118087437, "grad_norm": 1762.0460205078125, "learning_rate": 9.470376424301432e-06, "loss": 148.8191, "step": 28840 }, { "epoch": 0.23865657443024363, "grad_norm": 1468.8228759765625, "learning_rate": 9.46974451947792e-06, "loss": 154.2129, "step": 28850 }, { "epoch": 0.23873929767961286, "grad_norm": 1209.044921875, "learning_rate": 9.469112259017349e-06, "loss": 107.4766, "step": 28860 }, { "epoch": 0.23882202092898208, "grad_norm": 1850.443603515625, "learning_rate": 9.468479642970027e-06, "loss": 117.6253, "step": 28870 }, { "epoch": 0.23890474417835134, "grad_norm": 1324.356689453125, "learning_rate": 9.467846671386287e-06, "loss": 178.3749, "step": 28880 }, { "epoch": 0.23898746742772056, "grad_norm": 1060.7711181640625, "learning_rate": 9.467213344316493e-06, "loss": 101.0151, "step": 28890 }, { "epoch": 0.2390701906770898, "grad_norm": 2596.78076171875, "learning_rate": 9.466579661811032e-06, "loss": 149.5662, "step": 28900 }, { "epoch": 0.23915291392645904, "grad_norm": 1160.0654296875, "learning_rate": 9.46594562392033e-06, "loss": 114.8278, "step": 28910 }, { "epoch": 0.23923563717582827, "grad_norm": 1474.780029296875, "learning_rate": 9.465311230694828e-06, "loss": 94.9893, "step": 28920 }, { "epoch": 0.2393183604251975, "grad_norm": 827.9722900390625, "learning_rate": 9.464676482185005e-06, "loss": 82.4494, "step": 28930 }, { "epoch": 0.23940108367456675, "grad_norm": 490.88360595703125, "learning_rate": 9.464041378441365e-06, "loss": 160.122, "step": 28940 }, { "epoch": 0.23948380692393598, "grad_norm": 2539.133544921875, "learning_rate": 9.46340591951444e-06, "loss": 135.2263, "step": 28950 }, { "epoch": 0.2395665301733052, "grad_norm": 620.4109497070312, "learning_rate": 9.462770105454789e-06, "loss": 130.8018, "step": 28960 }, { "epoch": 0.23964925342267443, "grad_norm": 575.00146484375, "learning_rate": 9.462133936313002e-06, "loss": 123.9707, "step": 28970 }, { "epoch": 0.23973197667204368, "grad_norm": 764.5715942382812, "learning_rate": 9.461497412139697e-06, "loss": 103.3378, "step": 28980 }, { "epoch": 0.2398146999214129, "grad_norm": 1179.3966064453125, "learning_rate": 9.46086053298552e-06, "loss": 139.9406, "step": 28990 }, { "epoch": 0.23989742317078214, "grad_norm": 1064.7276611328125, "learning_rate": 9.460223298901138e-06, "loss": 100.375, "step": 29000 }, { "epoch": 0.2399801464201514, "grad_norm": 833.2089233398438, "learning_rate": 9.459585709937262e-06, "loss": 120.6056, "step": 29010 }, { "epoch": 0.24006286966952062, "grad_norm": 1361.6549072265625, "learning_rate": 9.458947766144617e-06, "loss": 129.4685, "step": 29020 }, { "epoch": 0.24014559291888984, "grad_norm": 1131.559326171875, "learning_rate": 9.458309467573963e-06, "loss": 90.7656, "step": 29030 }, { "epoch": 0.2402283161682591, "grad_norm": 1216.7392578125, "learning_rate": 9.457670814276083e-06, "loss": 105.4316, "step": 29040 }, { "epoch": 0.24031103941762832, "grad_norm": 853.8714599609375, "learning_rate": 9.457031806301795e-06, "loss": 94.2898, "step": 29050 }, { "epoch": 0.24039376266699755, "grad_norm": 826.33837890625, "learning_rate": 9.456392443701943e-06, "loss": 118.5048, "step": 29060 }, { "epoch": 0.2404764859163668, "grad_norm": 969.9588012695312, "learning_rate": 9.455752726527395e-06, "loss": 158.1088, "step": 29070 }, { "epoch": 0.24055920916573603, "grad_norm": 591.8674926757812, "learning_rate": 9.45511265482905e-06, "loss": 106.0139, "step": 29080 }, { "epoch": 0.24064193241510526, "grad_norm": 1056.0914306640625, "learning_rate": 9.454472228657841e-06, "loss": 148.1635, "step": 29090 }, { "epoch": 0.2407246556644745, "grad_norm": 1000.041259765625, "learning_rate": 9.453831448064717e-06, "loss": 119.1304, "step": 29100 }, { "epoch": 0.24080737891384374, "grad_norm": 765.8133544921875, "learning_rate": 9.453190313100666e-06, "loss": 83.0749, "step": 29110 }, { "epoch": 0.24089010216321297, "grad_norm": 963.5242919921875, "learning_rate": 9.4525488238167e-06, "loss": 153.7064, "step": 29120 }, { "epoch": 0.24097282541258222, "grad_norm": 809.49267578125, "learning_rate": 9.451906980263857e-06, "loss": 122.1319, "step": 29130 }, { "epoch": 0.24105554866195145, "grad_norm": 800.9232788085938, "learning_rate": 9.451264782493208e-06, "loss": 101.5012, "step": 29140 }, { "epoch": 0.24113827191132067, "grad_norm": 1233.4398193359375, "learning_rate": 9.450622230555849e-06, "loss": 144.6246, "step": 29150 }, { "epoch": 0.2412209951606899, "grad_norm": 692.1824951171875, "learning_rate": 9.449979324502905e-06, "loss": 160.0062, "step": 29160 }, { "epoch": 0.24130371841005915, "grad_norm": 946.5017700195312, "learning_rate": 9.449336064385529e-06, "loss": 105.0953, "step": 29170 }, { "epoch": 0.24138644165942838, "grad_norm": 1230.076416015625, "learning_rate": 9.4486924502549e-06, "loss": 120.0082, "step": 29180 }, { "epoch": 0.2414691649087976, "grad_norm": 839.8562622070312, "learning_rate": 9.448048482162231e-06, "loss": 137.8697, "step": 29190 }, { "epoch": 0.24155188815816686, "grad_norm": 882.9497680664062, "learning_rate": 9.447404160158758e-06, "loss": 119.5869, "step": 29200 }, { "epoch": 0.2416346114075361, "grad_norm": 1333.1221923828125, "learning_rate": 9.446759484295745e-06, "loss": 116.6337, "step": 29210 }, { "epoch": 0.2417173346569053, "grad_norm": 955.5294799804688, "learning_rate": 9.44611445462449e-06, "loss": 134.2271, "step": 29220 }, { "epoch": 0.24180005790627457, "grad_norm": 1251.0631103515625, "learning_rate": 9.445469071196312e-06, "loss": 124.4641, "step": 29230 }, { "epoch": 0.2418827811556438, "grad_norm": 834.0491943359375, "learning_rate": 9.444823334062562e-06, "loss": 116.2968, "step": 29240 }, { "epoch": 0.24196550440501302, "grad_norm": 1349.356201171875, "learning_rate": 9.444177243274619e-06, "loss": 131.4607, "step": 29250 }, { "epoch": 0.24204822765438228, "grad_norm": 1118.03173828125, "learning_rate": 9.443530798883887e-06, "loss": 107.2266, "step": 29260 }, { "epoch": 0.2421309509037515, "grad_norm": 1354.8619384765625, "learning_rate": 9.442884000941803e-06, "loss": 129.6626, "step": 29270 }, { "epoch": 0.24221367415312073, "grad_norm": 1301.5723876953125, "learning_rate": 9.44223684949983e-06, "loss": 126.9563, "step": 29280 }, { "epoch": 0.24229639740248998, "grad_norm": 639.9192504882812, "learning_rate": 9.441589344609457e-06, "loss": 97.4439, "step": 29290 }, { "epoch": 0.2423791206518592, "grad_norm": 1386.3795166015625, "learning_rate": 9.440941486322205e-06, "loss": 150.2773, "step": 29300 }, { "epoch": 0.24246184390122844, "grad_norm": 1358.3714599609375, "learning_rate": 9.44029327468962e-06, "loss": 110.78, "step": 29310 }, { "epoch": 0.24254456715059766, "grad_norm": 757.8045043945312, "learning_rate": 9.439644709763276e-06, "loss": 112.67, "step": 29320 }, { "epoch": 0.24262729039996692, "grad_norm": 930.5925903320312, "learning_rate": 9.43899579159478e-06, "loss": 121.9743, "step": 29330 }, { "epoch": 0.24271001364933614, "grad_norm": 1049.4073486328125, "learning_rate": 9.438346520235759e-06, "loss": 100.5406, "step": 29340 }, { "epoch": 0.24279273689870537, "grad_norm": 1115.40966796875, "learning_rate": 9.437696895737876e-06, "loss": 121.6903, "step": 29350 }, { "epoch": 0.24287546014807462, "grad_norm": 698.0397338867188, "learning_rate": 9.437046918152817e-06, "loss": 88.6896, "step": 29360 }, { "epoch": 0.24295818339744385, "grad_norm": 824.7769165039062, "learning_rate": 9.436396587532297e-06, "loss": 126.8226, "step": 29370 }, { "epoch": 0.24304090664681308, "grad_norm": 1229.65869140625, "learning_rate": 9.435745903928062e-06, "loss": 113.3302, "step": 29380 }, { "epoch": 0.24312362989618233, "grad_norm": 845.088623046875, "learning_rate": 9.435094867391881e-06, "loss": 154.009, "step": 29390 }, { "epoch": 0.24320635314555156, "grad_norm": 961.1011962890625, "learning_rate": 9.434443477975557e-06, "loss": 103.9956, "step": 29400 }, { "epoch": 0.24328907639492078, "grad_norm": 757.7135009765625, "learning_rate": 9.433791735730917e-06, "loss": 98.1805, "step": 29410 }, { "epoch": 0.24337179964429004, "grad_norm": 1421.3348388671875, "learning_rate": 9.433139640709817e-06, "loss": 132.9433, "step": 29420 }, { "epoch": 0.24345452289365926, "grad_norm": 608.5304565429688, "learning_rate": 9.432487192964142e-06, "loss": 122.0067, "step": 29430 }, { "epoch": 0.2435372461430285, "grad_norm": 1166.598876953125, "learning_rate": 9.431834392545803e-06, "loss": 127.8436, "step": 29440 }, { "epoch": 0.24361996939239774, "grad_norm": 1254.7440185546875, "learning_rate": 9.43118123950674e-06, "loss": 124.5958, "step": 29450 }, { "epoch": 0.24370269264176697, "grad_norm": 924.8063354492188, "learning_rate": 9.430527733898922e-06, "loss": 102.3073, "step": 29460 }, { "epoch": 0.2437854158911362, "grad_norm": 957.7825927734375, "learning_rate": 9.429873875774344e-06, "loss": 112.2574, "step": 29470 }, { "epoch": 0.24386813914050545, "grad_norm": 867.5020141601562, "learning_rate": 9.429219665185034e-06, "loss": 109.0799, "step": 29480 }, { "epoch": 0.24395086238987468, "grad_norm": 1606.21435546875, "learning_rate": 9.428565102183043e-06, "loss": 114.3639, "step": 29490 }, { "epoch": 0.2440335856392439, "grad_norm": 782.0588989257812, "learning_rate": 9.42791018682045e-06, "loss": 102.8368, "step": 29500 }, { "epoch": 0.24411630888861313, "grad_norm": 1499.0159912109375, "learning_rate": 9.427254919149367e-06, "loss": 129.8493, "step": 29510 }, { "epoch": 0.24419903213798239, "grad_norm": 1734.2025146484375, "learning_rate": 9.426599299221925e-06, "loss": 118.0028, "step": 29520 }, { "epoch": 0.2442817553873516, "grad_norm": 1104.4652099609375, "learning_rate": 9.425943327090295e-06, "loss": 133.7769, "step": 29530 }, { "epoch": 0.24436447863672084, "grad_norm": 809.5218505859375, "learning_rate": 9.425287002806666e-06, "loss": 101.5154, "step": 29540 }, { "epoch": 0.2444472018860901, "grad_norm": 860.76123046875, "learning_rate": 9.42463032642326e-06, "loss": 126.5965, "step": 29550 }, { "epoch": 0.24452992513545932, "grad_norm": 1226.3048095703125, "learning_rate": 9.423973297992324e-06, "loss": 133.2678, "step": 29560 }, { "epoch": 0.24461264838482855, "grad_norm": 638.6930541992188, "learning_rate": 9.423315917566137e-06, "loss": 153.0996, "step": 29570 }, { "epoch": 0.2446953716341978, "grad_norm": 598.7249755859375, "learning_rate": 9.422658185197002e-06, "loss": 122.7943, "step": 29580 }, { "epoch": 0.24477809488356703, "grad_norm": 711.6947021484375, "learning_rate": 9.422000100937253e-06, "loss": 93.9475, "step": 29590 }, { "epoch": 0.24486081813293625, "grad_norm": 1122.1689453125, "learning_rate": 9.42134166483925e-06, "loss": 97.7506, "step": 29600 }, { "epoch": 0.2449435413823055, "grad_norm": 1915.1302490234375, "learning_rate": 9.420682876955382e-06, "loss": 115.1031, "step": 29610 }, { "epoch": 0.24502626463167473, "grad_norm": 659.7858276367188, "learning_rate": 9.420023737338065e-06, "loss": 120.2869, "step": 29620 }, { "epoch": 0.24510898788104396, "grad_norm": 834.023193359375, "learning_rate": 9.419364246039745e-06, "loss": 125.4224, "step": 29630 }, { "epoch": 0.24519171113041321, "grad_norm": 1009.0372924804688, "learning_rate": 9.418704403112894e-06, "loss": 109.2442, "step": 29640 }, { "epoch": 0.24527443437978244, "grad_norm": 2674.98193359375, "learning_rate": 9.418044208610013e-06, "loss": 156.5225, "step": 29650 }, { "epoch": 0.24535715762915167, "grad_norm": 1011.0217895507812, "learning_rate": 9.41738366258363e-06, "loss": 126.1811, "step": 29660 }, { "epoch": 0.24543988087852092, "grad_norm": 610.7017211914062, "learning_rate": 9.416722765086304e-06, "loss": 144.7449, "step": 29670 }, { "epoch": 0.24552260412789015, "grad_norm": 1031.1539306640625, "learning_rate": 9.416061516170615e-06, "loss": 108.1692, "step": 29680 }, { "epoch": 0.24560532737725937, "grad_norm": 1801.7032470703125, "learning_rate": 9.415399915889179e-06, "loss": 121.3443, "step": 29690 }, { "epoch": 0.2456880506266286, "grad_norm": 1428.9144287109375, "learning_rate": 9.414737964294636e-06, "loss": 116.9526, "step": 29700 }, { "epoch": 0.24577077387599786, "grad_norm": 937.070556640625, "learning_rate": 9.414075661439653e-06, "loss": 111.9231, "step": 29710 }, { "epoch": 0.24585349712536708, "grad_norm": 1381.5968017578125, "learning_rate": 9.413413007376928e-06, "loss": 163.5947, "step": 29720 }, { "epoch": 0.2459362203747363, "grad_norm": 1236.41552734375, "learning_rate": 9.412750002159186e-06, "loss": 110.7294, "step": 29730 }, { "epoch": 0.24601894362410556, "grad_norm": 757.4229125976562, "learning_rate": 9.412086645839177e-06, "loss": 88.9742, "step": 29740 }, { "epoch": 0.2461016668734748, "grad_norm": 1002.5419311523438, "learning_rate": 9.411422938469683e-06, "loss": 137.723, "step": 29750 }, { "epoch": 0.24618439012284402, "grad_norm": 636.2584228515625, "learning_rate": 9.41075888010351e-06, "loss": 108.3714, "step": 29760 }, { "epoch": 0.24626711337221327, "grad_norm": 737.2359008789062, "learning_rate": 9.410094470793497e-06, "loss": 135.6444, "step": 29770 }, { "epoch": 0.2463498366215825, "grad_norm": 1887.1973876953125, "learning_rate": 9.409429710592505e-06, "loss": 126.0426, "step": 29780 }, { "epoch": 0.24643255987095172, "grad_norm": 1311.354248046875, "learning_rate": 9.408764599553429e-06, "loss": 156.2838, "step": 29790 }, { "epoch": 0.24651528312032098, "grad_norm": 1149.238525390625, "learning_rate": 9.408099137729188e-06, "loss": 130.7976, "step": 29800 }, { "epoch": 0.2465980063696902, "grad_norm": 1126.7828369140625, "learning_rate": 9.407433325172727e-06, "loss": 153.4184, "step": 29810 }, { "epoch": 0.24668072961905943, "grad_norm": 1250.0152587890625, "learning_rate": 9.406767161937025e-06, "loss": 142.3581, "step": 29820 }, { "epoch": 0.24676345286842868, "grad_norm": 1395.9898681640625, "learning_rate": 9.406100648075084e-06, "loss": 122.4098, "step": 29830 }, { "epoch": 0.2468461761177979, "grad_norm": 1577.0543212890625, "learning_rate": 9.405433783639936e-06, "loss": 112.6034, "step": 29840 }, { "epoch": 0.24692889936716714, "grad_norm": 1184.2935791015625, "learning_rate": 9.40476656868464e-06, "loss": 148.0747, "step": 29850 }, { "epoch": 0.24701162261653636, "grad_norm": 805.5813598632812, "learning_rate": 9.404099003262282e-06, "loss": 155.5525, "step": 29860 }, { "epoch": 0.24709434586590562, "grad_norm": 1295.2099609375, "learning_rate": 9.40343108742598e-06, "loss": 149.4768, "step": 29870 }, { "epoch": 0.24717706911527484, "grad_norm": 995.8720092773438, "learning_rate": 9.402762821228875e-06, "loss": 140.2816, "step": 29880 }, { "epoch": 0.24725979236464407, "grad_norm": 866.3977661132812, "learning_rate": 9.402094204724138e-06, "loss": 129.4959, "step": 29890 }, { "epoch": 0.24734251561401333, "grad_norm": 1852.4481201171875, "learning_rate": 9.401425237964966e-06, "loss": 102.9619, "step": 29900 }, { "epoch": 0.24742523886338255, "grad_norm": 743.556640625, "learning_rate": 9.400755921004592e-06, "loss": 85.2109, "step": 29910 }, { "epoch": 0.24750796211275178, "grad_norm": 809.3905639648438, "learning_rate": 9.400086253896264e-06, "loss": 106.4736, "step": 29920 }, { "epoch": 0.24759068536212103, "grad_norm": 783.1993408203125, "learning_rate": 9.399416236693264e-06, "loss": 125.8943, "step": 29930 }, { "epoch": 0.24767340861149026, "grad_norm": 772.568115234375, "learning_rate": 9.398745869448909e-06, "loss": 123.8559, "step": 29940 }, { "epoch": 0.24775613186085949, "grad_norm": 1002.6859130859375, "learning_rate": 9.39807515221653e-06, "loss": 101.8871, "step": 29950 }, { "epoch": 0.24783885511022874, "grad_norm": 1258.36572265625, "learning_rate": 9.397404085049496e-06, "loss": 98.4138, "step": 29960 }, { "epoch": 0.24792157835959797, "grad_norm": 1145.0703125, "learning_rate": 9.3967326680012e-06, "loss": 118.3927, "step": 29970 }, { "epoch": 0.2480043016089672, "grad_norm": 1031.3804931640625, "learning_rate": 9.396060901125064e-06, "loss": 105.3649, "step": 29980 }, { "epoch": 0.24808702485833645, "grad_norm": 2366.289794921875, "learning_rate": 9.395388784474538e-06, "loss": 168.2479, "step": 29990 }, { "epoch": 0.24816974810770567, "grad_norm": 369.7084045410156, "learning_rate": 9.394716318103098e-06, "loss": 121.3149, "step": 30000 }, { "epoch": 0.2482524713570749, "grad_norm": 1409.655029296875, "learning_rate": 9.394043502064249e-06, "loss": 105.2097, "step": 30010 }, { "epoch": 0.24833519460644415, "grad_norm": 1403.9825439453125, "learning_rate": 9.393370336411527e-06, "loss": 147.3934, "step": 30020 }, { "epoch": 0.24841791785581338, "grad_norm": 929.0065307617188, "learning_rate": 9.392696821198488e-06, "loss": 124.7842, "step": 30030 }, { "epoch": 0.2485006411051826, "grad_norm": 828.625244140625, "learning_rate": 9.392022956478724e-06, "loss": 112.9368, "step": 30040 }, { "epoch": 0.24858336435455183, "grad_norm": 3000.644287109375, "learning_rate": 9.391348742305849e-06, "loss": 148.1125, "step": 30050 }, { "epoch": 0.2486660876039211, "grad_norm": 908.979248046875, "learning_rate": 9.390674178733508e-06, "loss": 109.6535, "step": 30060 }, { "epoch": 0.24874881085329031, "grad_norm": 1394.7421875, "learning_rate": 9.389999265815373e-06, "loss": 112.9092, "step": 30070 }, { "epoch": 0.24883153410265954, "grad_norm": 1016.4574584960938, "learning_rate": 9.389324003605144e-06, "loss": 168.127, "step": 30080 }, { "epoch": 0.2489142573520288, "grad_norm": 1174.79443359375, "learning_rate": 9.388648392156547e-06, "loss": 112.0588, "step": 30090 }, { "epoch": 0.24899698060139802, "grad_norm": 2049.481689453125, "learning_rate": 9.387972431523341e-06, "loss": 127.4066, "step": 30100 }, { "epoch": 0.24907970385076725, "grad_norm": 712.5939331054688, "learning_rate": 9.387296121759305e-06, "loss": 98.8517, "step": 30110 }, { "epoch": 0.2491624271001365, "grad_norm": 872.814208984375, "learning_rate": 9.386619462918254e-06, "loss": 100.3602, "step": 30120 }, { "epoch": 0.24924515034950573, "grad_norm": 649.6997680664062, "learning_rate": 9.385942455054022e-06, "loss": 119.4873, "step": 30130 }, { "epoch": 0.24932787359887496, "grad_norm": 655.9243774414062, "learning_rate": 9.385265098220478e-06, "loss": 124.5341, "step": 30140 }, { "epoch": 0.2494105968482442, "grad_norm": 822.2200927734375, "learning_rate": 9.384587392471516e-06, "loss": 162.9077, "step": 30150 }, { "epoch": 0.24949332009761344, "grad_norm": 940.898193359375, "learning_rate": 9.383909337861058e-06, "loss": 118.583, "step": 30160 }, { "epoch": 0.24957604334698266, "grad_norm": 1128.41943359375, "learning_rate": 9.383230934443053e-06, "loss": 136.6669, "step": 30170 }, { "epoch": 0.24965876659635192, "grad_norm": 631.8690795898438, "learning_rate": 9.382552182271478e-06, "loss": 97.5566, "step": 30180 }, { "epoch": 0.24974148984572114, "grad_norm": 1021.1989135742188, "learning_rate": 9.38187308140034e-06, "loss": 146.495, "step": 30190 }, { "epoch": 0.24982421309509037, "grad_norm": 1181.3828125, "learning_rate": 9.381193631883672e-06, "loss": 150.6252, "step": 30200 }, { "epoch": 0.24990693634445962, "grad_norm": 814.0835571289062, "learning_rate": 9.380513833775531e-06, "loss": 114.7124, "step": 30210 }, { "epoch": 0.24998965959382885, "grad_norm": 1297.4193115234375, "learning_rate": 9.37983368713001e-06, "loss": 97.1973, "step": 30220 }, { "epoch": 0.2500723828431981, "grad_norm": 800.78564453125, "learning_rate": 9.379153192001223e-06, "loss": 98.411, "step": 30230 }, { "epoch": 0.2501551060925673, "grad_norm": 1123.505859375, "learning_rate": 9.378472348443315e-06, "loss": 119.3296, "step": 30240 }, { "epoch": 0.25023782934193656, "grad_norm": 888.609375, "learning_rate": 9.377791156510456e-06, "loss": 74.0182, "step": 30250 }, { "epoch": 0.2503205525913058, "grad_norm": 713.3021240234375, "learning_rate": 9.377109616256846e-06, "loss": 147.7178, "step": 30260 }, { "epoch": 0.250403275840675, "grad_norm": 1878.27880859375, "learning_rate": 9.37642772773671e-06, "loss": 154.91, "step": 30270 }, { "epoch": 0.25048599909004426, "grad_norm": 625.2662353515625, "learning_rate": 9.375745491004307e-06, "loss": 90.2972, "step": 30280 }, { "epoch": 0.2505687223394135, "grad_norm": 621.6907958984375, "learning_rate": 9.375062906113916e-06, "loss": 126.9956, "step": 30290 }, { "epoch": 0.2506514455887827, "grad_norm": 1250.3077392578125, "learning_rate": 9.37437997311985e-06, "loss": 107.0872, "step": 30300 }, { "epoch": 0.25073416883815197, "grad_norm": 720.9534912109375, "learning_rate": 9.373696692076446e-06, "loss": 105.0815, "step": 30310 }, { "epoch": 0.25081689208752117, "grad_norm": 684.220703125, "learning_rate": 9.373013063038066e-06, "loss": 129.8487, "step": 30320 }, { "epoch": 0.2508996153368904, "grad_norm": 1063.6759033203125, "learning_rate": 9.372329086059108e-06, "loss": 135.9542, "step": 30330 }, { "epoch": 0.2509823385862597, "grad_norm": 1607.3919677734375, "learning_rate": 9.37164476119399e-06, "loss": 142.6617, "step": 30340 }, { "epoch": 0.2510650618356289, "grad_norm": 542.721435546875, "learning_rate": 9.370960088497162e-06, "loss": 106.839, "step": 30350 }, { "epoch": 0.25114778508499813, "grad_norm": 549.24560546875, "learning_rate": 9.370275068023097e-06, "loss": 129.0447, "step": 30360 }, { "epoch": 0.2512305083343674, "grad_norm": 960.9791259765625, "learning_rate": 9.369589699826306e-06, "loss": 140.4398, "step": 30370 }, { "epoch": 0.2513132315837366, "grad_norm": 1057.4302978515625, "learning_rate": 9.368903983961315e-06, "loss": 138.126, "step": 30380 }, { "epoch": 0.25139595483310584, "grad_norm": 1365.5726318359375, "learning_rate": 9.368217920482684e-06, "loss": 139.174, "step": 30390 }, { "epoch": 0.2514786780824751, "grad_norm": 887.7735595703125, "learning_rate": 9.367531509445001e-06, "loss": 129.102, "step": 30400 }, { "epoch": 0.2515614013318443, "grad_norm": 1251.561767578125, "learning_rate": 9.366844750902878e-06, "loss": 121.9665, "step": 30410 }, { "epoch": 0.25164412458121355, "grad_norm": 881.740234375, "learning_rate": 9.36615764491096e-06, "loss": 82.5869, "step": 30420 }, { "epoch": 0.2517268478305828, "grad_norm": 821.780029296875, "learning_rate": 9.365470191523917e-06, "loss": 146.2663, "step": 30430 }, { "epoch": 0.251809571079952, "grad_norm": 626.3407592773438, "learning_rate": 9.364782390796446e-06, "loss": 86.4238, "step": 30440 }, { "epoch": 0.25189229432932125, "grad_norm": 1124.6002197265625, "learning_rate": 9.364094242783272e-06, "loss": 146.8187, "step": 30450 }, { "epoch": 0.2519750175786905, "grad_norm": 631.0712890625, "learning_rate": 9.363405747539147e-06, "loss": 98.5037, "step": 30460 }, { "epoch": 0.2520577408280597, "grad_norm": 949.3443603515625, "learning_rate": 9.362716905118851e-06, "loss": 139.6968, "step": 30470 }, { "epoch": 0.25214046407742896, "grad_norm": 513.1497802734375, "learning_rate": 9.362027715577195e-06, "loss": 118.3806, "step": 30480 }, { "epoch": 0.2522231873267982, "grad_norm": 1057.8067626953125, "learning_rate": 9.361338178969012e-06, "loss": 108.9348, "step": 30490 }, { "epoch": 0.2523059105761674, "grad_norm": 903.6969604492188, "learning_rate": 9.360648295349165e-06, "loss": 105.4085, "step": 30500 }, { "epoch": 0.25238863382553667, "grad_norm": 2535.44189453125, "learning_rate": 9.359958064772547e-06, "loss": 161.6714, "step": 30510 }, { "epoch": 0.2524713570749059, "grad_norm": 1677.7100830078125, "learning_rate": 9.359267487294075e-06, "loss": 128.2102, "step": 30520 }, { "epoch": 0.2525540803242751, "grad_norm": 1912.9716796875, "learning_rate": 9.358576562968695e-06, "loss": 118.4899, "step": 30530 }, { "epoch": 0.2526368035736444, "grad_norm": 763.476318359375, "learning_rate": 9.357885291851382e-06, "loss": 124.9722, "step": 30540 }, { "epoch": 0.25271952682301363, "grad_norm": 1949.4473876953125, "learning_rate": 9.357193673997133e-06, "loss": 104.1943, "step": 30550 }, { "epoch": 0.25280225007238283, "grad_norm": 1901.4537353515625, "learning_rate": 9.356501709460984e-06, "loss": 108.5047, "step": 30560 }, { "epoch": 0.2528849733217521, "grad_norm": 773.7173461914062, "learning_rate": 9.355809398297986e-06, "loss": 95.4959, "step": 30570 }, { "epoch": 0.25296769657112134, "grad_norm": 1136.826171875, "learning_rate": 9.355116740563225e-06, "loss": 136.653, "step": 30580 }, { "epoch": 0.25305041982049054, "grad_norm": 826.9710693359375, "learning_rate": 9.354423736311813e-06, "loss": 119.1377, "step": 30590 }, { "epoch": 0.2531331430698598, "grad_norm": 1087.69677734375, "learning_rate": 9.353730385598887e-06, "loss": 101.276, "step": 30600 }, { "epoch": 0.25321586631922904, "grad_norm": 567.2242431640625, "learning_rate": 9.353036688479615e-06, "loss": 116.7849, "step": 30610 }, { "epoch": 0.25329858956859824, "grad_norm": 1647.7808837890625, "learning_rate": 9.352342645009193e-06, "loss": 142.3532, "step": 30620 }, { "epoch": 0.2533813128179675, "grad_norm": 1223.5712890625, "learning_rate": 9.35164825524284e-06, "loss": 106.8768, "step": 30630 }, { "epoch": 0.25346403606733675, "grad_norm": 1215.446044921875, "learning_rate": 9.350953519235807e-06, "loss": 142.7279, "step": 30640 }, { "epoch": 0.25354675931670595, "grad_norm": 1093.0865478515625, "learning_rate": 9.35025843704337e-06, "loss": 133.1846, "step": 30650 }, { "epoch": 0.2536294825660752, "grad_norm": 603.0365600585938, "learning_rate": 9.349563008720836e-06, "loss": 143.9578, "step": 30660 }, { "epoch": 0.25371220581544446, "grad_norm": 926.9697265625, "learning_rate": 9.348867234323534e-06, "loss": 115.379, "step": 30670 }, { "epoch": 0.25379492906481366, "grad_norm": 1196.4434814453125, "learning_rate": 9.348171113906826e-06, "loss": 128.1764, "step": 30680 }, { "epoch": 0.2538776523141829, "grad_norm": 750.9150390625, "learning_rate": 9.347474647526095e-06, "loss": 194.924, "step": 30690 }, { "epoch": 0.2539603755635521, "grad_norm": 1341.235595703125, "learning_rate": 9.34677783523676e-06, "loss": 137.8295, "step": 30700 }, { "epoch": 0.25404309881292136, "grad_norm": 1126.3736572265625, "learning_rate": 9.346080677094262e-06, "loss": 132.7227, "step": 30710 }, { "epoch": 0.2541258220622906, "grad_norm": 824.2003784179688, "learning_rate": 9.345383173154072e-06, "loss": 133.4808, "step": 30720 }, { "epoch": 0.2542085453116598, "grad_norm": 929.5851440429688, "learning_rate": 9.344685323471682e-06, "loss": 109.8865, "step": 30730 }, { "epoch": 0.25429126856102907, "grad_norm": 763.9591674804688, "learning_rate": 9.343987128102624e-06, "loss": 114.478, "step": 30740 }, { "epoch": 0.2543739918103983, "grad_norm": 896.4277954101562, "learning_rate": 9.343288587102444e-06, "loss": 139.7716, "step": 30750 }, { "epoch": 0.2544567150597675, "grad_norm": 890.5599975585938, "learning_rate": 9.342589700526725e-06, "loss": 119.8424, "step": 30760 }, { "epoch": 0.2545394383091368, "grad_norm": 954.55322265625, "learning_rate": 9.341890468431072e-06, "loss": 197.9463, "step": 30770 }, { "epoch": 0.25462216155850603, "grad_norm": 962.0372314453125, "learning_rate": 9.341190890871123e-06, "loss": 173.233, "step": 30780 }, { "epoch": 0.25470488480787523, "grad_norm": 1072.42724609375, "learning_rate": 9.340490967902535e-06, "loss": 114.3112, "step": 30790 }, { "epoch": 0.2547876080572445, "grad_norm": 927.2454223632812, "learning_rate": 9.339790699581004e-06, "loss": 98.9923, "step": 30800 }, { "epoch": 0.25487033130661374, "grad_norm": 485.0035400390625, "learning_rate": 9.339090085962244e-06, "loss": 109.2545, "step": 30810 }, { "epoch": 0.25495305455598294, "grad_norm": 1289.7406005859375, "learning_rate": 9.338389127101998e-06, "loss": 137.8362, "step": 30820 }, { "epoch": 0.2550357778053522, "grad_norm": 655.8922119140625, "learning_rate": 9.337687823056041e-06, "loss": 101.9889, "step": 30830 }, { "epoch": 0.25511850105472145, "grad_norm": 1242.1337890625, "learning_rate": 9.336986173880169e-06, "loss": 106.3836, "step": 30840 }, { "epoch": 0.25520122430409065, "grad_norm": 580.3970947265625, "learning_rate": 9.336284179630215e-06, "loss": 94.8493, "step": 30850 }, { "epoch": 0.2552839475534599, "grad_norm": 789.4066162109375, "learning_rate": 9.335581840362026e-06, "loss": 74.5354, "step": 30860 }, { "epoch": 0.25536667080282915, "grad_norm": 1426.3404541015625, "learning_rate": 9.33487915613149e-06, "loss": 108.0914, "step": 30870 }, { "epoch": 0.25544939405219835, "grad_norm": 826.1908569335938, "learning_rate": 9.334176126994512e-06, "loss": 109.946, "step": 30880 }, { "epoch": 0.2555321173015676, "grad_norm": 755.2938232421875, "learning_rate": 9.333472753007031e-06, "loss": 111.354, "step": 30890 }, { "epoch": 0.25561484055093686, "grad_norm": 780.1597290039062, "learning_rate": 9.332769034225012e-06, "loss": 142.2512, "step": 30900 }, { "epoch": 0.25569756380030606, "grad_norm": 731.4649047851562, "learning_rate": 9.332064970704445e-06, "loss": 156.2841, "step": 30910 }, { "epoch": 0.2557802870496753, "grad_norm": 1020.5748291015625, "learning_rate": 9.33136056250135e-06, "loss": 127.7295, "step": 30920 }, { "epoch": 0.25586301029904457, "grad_norm": 698.0821533203125, "learning_rate": 9.330655809671773e-06, "loss": 92.2535, "step": 30930 }, { "epoch": 0.25594573354841377, "grad_norm": 698.9208374023438, "learning_rate": 9.32995071227179e-06, "loss": 124.9385, "step": 30940 }, { "epoch": 0.256028456797783, "grad_norm": 1320.0196533203125, "learning_rate": 9.3292452703575e-06, "loss": 128.1827, "step": 30950 }, { "epoch": 0.2561111800471523, "grad_norm": 2012.84033203125, "learning_rate": 9.328539483985031e-06, "loss": 155.1285, "step": 30960 }, { "epoch": 0.2561939032965215, "grad_norm": 1151.90625, "learning_rate": 9.327833353210541e-06, "loss": 111.3364, "step": 30970 }, { "epoch": 0.25627662654589073, "grad_norm": 1791.881103515625, "learning_rate": 9.327126878090214e-06, "loss": 118.88, "step": 30980 }, { "epoch": 0.25635934979526, "grad_norm": 741.9896850585938, "learning_rate": 9.32642005868026e-06, "loss": 122.3429, "step": 30990 }, { "epoch": 0.2564420730446292, "grad_norm": 712.4248657226562, "learning_rate": 9.325712895036916e-06, "loss": 125.5105, "step": 31000 }, { "epoch": 0.25652479629399844, "grad_norm": 1130.374267578125, "learning_rate": 9.32500538721645e-06, "loss": 86.682, "step": 31010 }, { "epoch": 0.2566075195433677, "grad_norm": 1284.844970703125, "learning_rate": 9.324297535275156e-06, "loss": 114.092, "step": 31020 }, { "epoch": 0.2566902427927369, "grad_norm": 911.753173828125, "learning_rate": 9.323589339269352e-06, "loss": 106.8176, "step": 31030 }, { "epoch": 0.25677296604210614, "grad_norm": 3918.962646484375, "learning_rate": 9.322880799255385e-06, "loss": 160.4931, "step": 31040 }, { "epoch": 0.25685568929147534, "grad_norm": 835.4883422851562, "learning_rate": 9.322171915289635e-06, "loss": 108.218, "step": 31050 }, { "epoch": 0.2569384125408446, "grad_norm": 745.3687133789062, "learning_rate": 9.321462687428499e-06, "loss": 103.9572, "step": 31060 }, { "epoch": 0.25702113579021385, "grad_norm": 2105.262939453125, "learning_rate": 9.320753115728413e-06, "loss": 137.9624, "step": 31070 }, { "epoch": 0.25710385903958305, "grad_norm": 1638.893798828125, "learning_rate": 9.320043200245829e-06, "loss": 76.6734, "step": 31080 }, { "epoch": 0.2571865822889523, "grad_norm": 1251.9464111328125, "learning_rate": 9.319332941037235e-06, "loss": 128.9104, "step": 31090 }, { "epoch": 0.25726930553832156, "grad_norm": 946.0052490234375, "learning_rate": 9.31862233815914e-06, "loss": 83.5885, "step": 31100 }, { "epoch": 0.25735202878769076, "grad_norm": 613.86181640625, "learning_rate": 9.317911391668087e-06, "loss": 88.8766, "step": 31110 }, { "epoch": 0.25743475203706, "grad_norm": 980.6512451171875, "learning_rate": 9.317200101620641e-06, "loss": 111.9333, "step": 31120 }, { "epoch": 0.25751747528642926, "grad_norm": 959.6453857421875, "learning_rate": 9.316488468073397e-06, "loss": 87.5497, "step": 31130 }, { "epoch": 0.25760019853579846, "grad_norm": 784.59033203125, "learning_rate": 9.315776491082973e-06, "loss": 140.7631, "step": 31140 }, { "epoch": 0.2576829217851677, "grad_norm": 1294.2275390625, "learning_rate": 9.315064170706023e-06, "loss": 114.7354, "step": 31150 }, { "epoch": 0.25776564503453697, "grad_norm": 1613.03857421875, "learning_rate": 9.31435150699922e-06, "loss": 123.3567, "step": 31160 }, { "epoch": 0.25784836828390617, "grad_norm": 1174.7305908203125, "learning_rate": 9.313638500019267e-06, "loss": 133.3073, "step": 31170 }, { "epoch": 0.2579310915332754, "grad_norm": 587.86572265625, "learning_rate": 9.312925149822895e-06, "loss": 90.9177, "step": 31180 }, { "epoch": 0.2580138147826447, "grad_norm": 934.0054931640625, "learning_rate": 9.312211456466862e-06, "loss": 127.4864, "step": 31190 }, { "epoch": 0.2580965380320139, "grad_norm": 712.6873779296875, "learning_rate": 9.311497420007955e-06, "loss": 111.8241, "step": 31200 }, { "epoch": 0.25817926128138313, "grad_norm": 1259.027587890625, "learning_rate": 9.310783040502987e-06, "loss": 120.1594, "step": 31210 }, { "epoch": 0.2582619845307524, "grad_norm": 1388.41162109375, "learning_rate": 9.310068318008794e-06, "loss": 121.605, "step": 31220 }, { "epoch": 0.2583447077801216, "grad_norm": 1037.8282470703125, "learning_rate": 9.309353252582246e-06, "loss": 138.8729, "step": 31230 }, { "epoch": 0.25842743102949084, "grad_norm": 943.52490234375, "learning_rate": 9.308637844280236e-06, "loss": 132.2363, "step": 31240 }, { "epoch": 0.2585101542788601, "grad_norm": 1297.0338134765625, "learning_rate": 9.307922093159688e-06, "loss": 113.9879, "step": 31250 }, { "epoch": 0.2585928775282293, "grad_norm": 739.4756469726562, "learning_rate": 9.30720599927755e-06, "loss": 79.2995, "step": 31260 }, { "epoch": 0.25867560077759855, "grad_norm": 1136.6614990234375, "learning_rate": 9.306489562690797e-06, "loss": 148.8123, "step": 31270 }, { "epoch": 0.2587583240269678, "grad_norm": 1102.057861328125, "learning_rate": 9.305772783456435e-06, "loss": 126.115, "step": 31280 }, { "epoch": 0.258841047276337, "grad_norm": 1000.919677734375, "learning_rate": 9.305055661631493e-06, "loss": 128.0628, "step": 31290 }, { "epoch": 0.25892377052570625, "grad_norm": 1486.086669921875, "learning_rate": 9.304338197273029e-06, "loss": 141.9742, "step": 31300 }, { "epoch": 0.2590064937750755, "grad_norm": 1208.7861328125, "learning_rate": 9.303620390438128e-06, "loss": 119.3574, "step": 31310 }, { "epoch": 0.2590892170244447, "grad_norm": 1793.4461669921875, "learning_rate": 9.302902241183905e-06, "loss": 115.7504, "step": 31320 }, { "epoch": 0.25917194027381396, "grad_norm": 1034.2620849609375, "learning_rate": 9.302183749567498e-06, "loss": 104.3807, "step": 31330 }, { "epoch": 0.2592546635231832, "grad_norm": 1191.4996337890625, "learning_rate": 9.301464915646074e-06, "loss": 95.0326, "step": 31340 }, { "epoch": 0.2593373867725524, "grad_norm": 663.1774291992188, "learning_rate": 9.30074573947683e-06, "loss": 105.2758, "step": 31350 }, { "epoch": 0.25942011002192167, "grad_norm": 1111.489501953125, "learning_rate": 9.30002622111698e-06, "loss": 120.7122, "step": 31360 }, { "epoch": 0.2595028332712909, "grad_norm": 1140.2496337890625, "learning_rate": 9.299306360623782e-06, "loss": 111.4868, "step": 31370 }, { "epoch": 0.2595855565206601, "grad_norm": 1038.2596435546875, "learning_rate": 9.298586158054508e-06, "loss": 119.8149, "step": 31380 }, { "epoch": 0.2596682797700294, "grad_norm": 1008.992431640625, "learning_rate": 9.297865613466459e-06, "loss": 145.3494, "step": 31390 }, { "epoch": 0.25975100301939863, "grad_norm": 1030.713623046875, "learning_rate": 9.29714472691697e-06, "loss": 116.2307, "step": 31400 }, { "epoch": 0.25983372626876783, "grad_norm": 1515.673095703125, "learning_rate": 9.296423498463396e-06, "loss": 125.331, "step": 31410 }, { "epoch": 0.2599164495181371, "grad_norm": 1273.73486328125, "learning_rate": 9.29570192816312e-06, "loss": 140.6214, "step": 31420 }, { "epoch": 0.2599991727675063, "grad_norm": 710.17236328125, "learning_rate": 9.29498001607356e-06, "loss": 108.083, "step": 31430 }, { "epoch": 0.26008189601687554, "grad_norm": 900.10107421875, "learning_rate": 9.294257762252148e-06, "loss": 98.9134, "step": 31440 }, { "epoch": 0.2601646192662448, "grad_norm": 875.4248657226562, "learning_rate": 9.293535166756356e-06, "loss": 174.0914, "step": 31450 }, { "epoch": 0.260247342515614, "grad_norm": 1023.4577026367188, "learning_rate": 9.292812229643674e-06, "loss": 96.2018, "step": 31460 }, { "epoch": 0.26033006576498324, "grad_norm": 1871.7161865234375, "learning_rate": 9.292088950971624e-06, "loss": 135.2347, "step": 31470 }, { "epoch": 0.2604127890143525, "grad_norm": 1158.376953125, "learning_rate": 9.291365330797755e-06, "loss": 131.9809, "step": 31480 }, { "epoch": 0.2604955122637217, "grad_norm": 962.1968383789062, "learning_rate": 9.290641369179643e-06, "loss": 109.7965, "step": 31490 }, { "epoch": 0.26057823551309095, "grad_norm": 4627.09521484375, "learning_rate": 9.289917066174887e-06, "loss": 133.032, "step": 31500 }, { "epoch": 0.2606609587624602, "grad_norm": 1069.57177734375, "learning_rate": 9.289192421841116e-06, "loss": 114.866, "step": 31510 }, { "epoch": 0.2607436820118294, "grad_norm": 1533.0517578125, "learning_rate": 9.288467436235992e-06, "loss": 135.5069, "step": 31520 }, { "epoch": 0.26082640526119866, "grad_norm": 876.2843627929688, "learning_rate": 9.287742109417194e-06, "loss": 165.9743, "step": 31530 }, { "epoch": 0.2609091285105679, "grad_norm": 849.820556640625, "learning_rate": 9.287016441442435e-06, "loss": 113.8865, "step": 31540 }, { "epoch": 0.2609918517599371, "grad_norm": 1214.044189453125, "learning_rate": 9.28629043236945e-06, "loss": 129.5201, "step": 31550 }, { "epoch": 0.26107457500930636, "grad_norm": 974.0680541992188, "learning_rate": 9.285564082256011e-06, "loss": 106.2931, "step": 31560 }, { "epoch": 0.2611572982586756, "grad_norm": 591.6702270507812, "learning_rate": 9.284837391159904e-06, "loss": 77.5611, "step": 31570 }, { "epoch": 0.2612400215080448, "grad_norm": 995.6359252929688, "learning_rate": 9.284110359138951e-06, "loss": 169.7267, "step": 31580 }, { "epoch": 0.26132274475741407, "grad_norm": 993.3253784179688, "learning_rate": 9.283382986250997e-06, "loss": 117.8098, "step": 31590 }, { "epoch": 0.2614054680067833, "grad_norm": 720.0477905273438, "learning_rate": 9.282655272553917e-06, "loss": 164.0745, "step": 31600 }, { "epoch": 0.2614881912561525, "grad_norm": 1000.5869750976562, "learning_rate": 9.281927218105613e-06, "loss": 103.8817, "step": 31610 }, { "epoch": 0.2615709145055218, "grad_norm": 701.7263793945312, "learning_rate": 9.281198822964011e-06, "loss": 115.0276, "step": 31620 }, { "epoch": 0.26165363775489103, "grad_norm": 1523.617919921875, "learning_rate": 9.280470087187066e-06, "loss": 150.8629, "step": 31630 }, { "epoch": 0.26173636100426023, "grad_norm": 1352.073486328125, "learning_rate": 9.279741010832761e-06, "loss": 111.8819, "step": 31640 }, { "epoch": 0.2618190842536295, "grad_norm": 784.3294677734375, "learning_rate": 9.279011593959107e-06, "loss": 134.8354, "step": 31650 }, { "epoch": 0.26190180750299874, "grad_norm": 518.8139038085938, "learning_rate": 9.278281836624137e-06, "loss": 109.1452, "step": 31660 }, { "epoch": 0.26198453075236794, "grad_norm": 738.8187255859375, "learning_rate": 9.277551738885915e-06, "loss": 137.3162, "step": 31670 }, { "epoch": 0.2620672540017372, "grad_norm": 1180.13037109375, "learning_rate": 9.276821300802535e-06, "loss": 101.0768, "step": 31680 }, { "epoch": 0.26214997725110645, "grad_norm": 1431.0516357421875, "learning_rate": 9.276090522432109e-06, "loss": 99.368, "step": 31690 }, { "epoch": 0.26223270050047565, "grad_norm": 688.6452026367188, "learning_rate": 9.275359403832787e-06, "loss": 123.8677, "step": 31700 }, { "epoch": 0.2623154237498449, "grad_norm": 854.5988159179688, "learning_rate": 9.274627945062738e-06, "loss": 111.6149, "step": 31710 }, { "epoch": 0.26239814699921415, "grad_norm": 1075.212158203125, "learning_rate": 9.27389614618016e-06, "loss": 138.4687, "step": 31720 }, { "epoch": 0.26248087024858335, "grad_norm": 573.2298583984375, "learning_rate": 9.273164007243281e-06, "loss": 108.3331, "step": 31730 }, { "epoch": 0.2625635934979526, "grad_norm": 990.3570556640625, "learning_rate": 9.272431528310354e-06, "loss": 112.5479, "step": 31740 }, { "epoch": 0.26264631674732186, "grad_norm": 1721.805419921875, "learning_rate": 9.271698709439658e-06, "loss": 140.3488, "step": 31750 }, { "epoch": 0.26272903999669106, "grad_norm": 1286.5728759765625, "learning_rate": 9.2709655506895e-06, "loss": 168.1867, "step": 31760 }, { "epoch": 0.2628117632460603, "grad_norm": 582.025146484375, "learning_rate": 9.270232052118214e-06, "loss": 119.0196, "step": 31770 }, { "epoch": 0.2628944864954295, "grad_norm": 1799.970703125, "learning_rate": 9.26949821378416e-06, "loss": 130.6446, "step": 31780 }, { "epoch": 0.26297720974479877, "grad_norm": 810.4905395507812, "learning_rate": 9.268764035745727e-06, "loss": 123.3437, "step": 31790 }, { "epoch": 0.263059932994168, "grad_norm": 1139.095947265625, "learning_rate": 9.268029518061335e-06, "loss": 138.0163, "step": 31800 }, { "epoch": 0.2631426562435372, "grad_norm": 728.7420654296875, "learning_rate": 9.267294660789417e-06, "loss": 118.9001, "step": 31810 }, { "epoch": 0.2632253794929065, "grad_norm": 937.4639282226562, "learning_rate": 9.26655946398845e-06, "loss": 101.5111, "step": 31820 }, { "epoch": 0.26330810274227573, "grad_norm": 951.8093872070312, "learning_rate": 9.265823927716927e-06, "loss": 114.6193, "step": 31830 }, { "epoch": 0.26339082599164493, "grad_norm": 1131.0379638671875, "learning_rate": 9.26508805203337e-06, "loss": 164.4967, "step": 31840 }, { "epoch": 0.2634735492410142, "grad_norm": 916.825439453125, "learning_rate": 9.264351836996332e-06, "loss": 99.9893, "step": 31850 }, { "epoch": 0.26355627249038344, "grad_norm": 781.8618774414062, "learning_rate": 9.26361528266439e-06, "loss": 147.4806, "step": 31860 }, { "epoch": 0.26363899573975264, "grad_norm": 1081.302001953125, "learning_rate": 9.262878389096147e-06, "loss": 107.9612, "step": 31870 }, { "epoch": 0.2637217189891219, "grad_norm": 1674.4130859375, "learning_rate": 9.262141156350233e-06, "loss": 120.8496, "step": 31880 }, { "epoch": 0.26380444223849114, "grad_norm": 599.2373046875, "learning_rate": 9.261403584485308e-06, "loss": 130.4039, "step": 31890 }, { "epoch": 0.26388716548786034, "grad_norm": 931.727783203125, "learning_rate": 9.260665673560058e-06, "loss": 94.2291, "step": 31900 }, { "epoch": 0.2639698887372296, "grad_norm": 695.3705444335938, "learning_rate": 9.259927423633193e-06, "loss": 173.953, "step": 31910 }, { "epoch": 0.26405261198659885, "grad_norm": 643.5379638671875, "learning_rate": 9.259188834763455e-06, "loss": 91.6798, "step": 31920 }, { "epoch": 0.26413533523596805, "grad_norm": 1540.7181396484375, "learning_rate": 9.258449907009607e-06, "loss": 126.6724, "step": 31930 }, { "epoch": 0.2642180584853373, "grad_norm": 1498.8092041015625, "learning_rate": 9.257710640430444e-06, "loss": 110.8607, "step": 31940 }, { "epoch": 0.26430078173470656, "grad_norm": 626.6985473632812, "learning_rate": 9.256971035084786e-06, "loss": 85.7513, "step": 31950 }, { "epoch": 0.26438350498407576, "grad_norm": 1363.011962890625, "learning_rate": 9.256231091031477e-06, "loss": 101.1794, "step": 31960 }, { "epoch": 0.264466228233445, "grad_norm": 1134.742919921875, "learning_rate": 9.255490808329397e-06, "loss": 212.7933, "step": 31970 }, { "epoch": 0.26454895148281427, "grad_norm": 1176.7347412109375, "learning_rate": 9.254750187037443e-06, "loss": 122.8415, "step": 31980 }, { "epoch": 0.26463167473218346, "grad_norm": 843.20458984375, "learning_rate": 9.254009227214543e-06, "loss": 115.774, "step": 31990 }, { "epoch": 0.2647143979815527, "grad_norm": 990.8587646484375, "learning_rate": 9.253267928919652e-06, "loss": 141.8495, "step": 32000 }, { "epoch": 0.264797121230922, "grad_norm": 1063.32763671875, "learning_rate": 9.25252629221175e-06, "loss": 118.555, "step": 32010 }, { "epoch": 0.26487984448029117, "grad_norm": 1893.1072998046875, "learning_rate": 9.251784317149848e-06, "loss": 122.1342, "step": 32020 }, { "epoch": 0.2649625677296604, "grad_norm": 1173.83349609375, "learning_rate": 9.251042003792983e-06, "loss": 161.0942, "step": 32030 }, { "epoch": 0.2650452909790297, "grad_norm": 927.9155883789062, "learning_rate": 9.250299352200214e-06, "loss": 78.0564, "step": 32040 }, { "epoch": 0.2651280142283989, "grad_norm": 1478.896484375, "learning_rate": 9.249556362430631e-06, "loss": 129.6906, "step": 32050 }, { "epoch": 0.26521073747776813, "grad_norm": 772.536865234375, "learning_rate": 9.248813034543353e-06, "loss": 102.5596, "step": 32060 }, { "epoch": 0.2652934607271374, "grad_norm": 728.9833984375, "learning_rate": 9.24806936859752e-06, "loss": 111.6626, "step": 32070 }, { "epoch": 0.2653761839765066, "grad_norm": 1042.043701171875, "learning_rate": 9.247325364652304e-06, "loss": 132.3886, "step": 32080 }, { "epoch": 0.26545890722587584, "grad_norm": 490.30419921875, "learning_rate": 9.2465810227669e-06, "loss": 128.4408, "step": 32090 }, { "epoch": 0.2655416304752451, "grad_norm": 1035.492919921875, "learning_rate": 9.245836343000534e-06, "loss": 102.5217, "step": 32100 }, { "epoch": 0.2656243537246143, "grad_norm": 1005.1189575195312, "learning_rate": 9.245091325412456e-06, "loss": 112.0046, "step": 32110 }, { "epoch": 0.26570707697398355, "grad_norm": 864.4721069335938, "learning_rate": 9.244345970061944e-06, "loss": 109.3595, "step": 32120 }, { "epoch": 0.26578980022335275, "grad_norm": 803.18896484375, "learning_rate": 9.243600277008301e-06, "loss": 123.4932, "step": 32130 }, { "epoch": 0.265872523472722, "grad_norm": 1339.6492919921875, "learning_rate": 9.24285424631086e-06, "loss": 131.0302, "step": 32140 }, { "epoch": 0.26595524672209125, "grad_norm": 331.6441650390625, "learning_rate": 9.242107878028978e-06, "loss": 84.0776, "step": 32150 }, { "epoch": 0.26603796997146045, "grad_norm": 1174.8720703125, "learning_rate": 9.241361172222043e-06, "loss": 144.0584, "step": 32160 }, { "epoch": 0.2661206932208297, "grad_norm": 955.9444580078125, "learning_rate": 9.240614128949463e-06, "loss": 79.2642, "step": 32170 }, { "epoch": 0.26620341647019896, "grad_norm": 408.5090637207031, "learning_rate": 9.239866748270679e-06, "loss": 80.5909, "step": 32180 }, { "epoch": 0.26628613971956816, "grad_norm": 1054.4498291015625, "learning_rate": 9.239119030245156e-06, "loss": 105.1165, "step": 32190 }, { "epoch": 0.2663688629689374, "grad_norm": 1652.845703125, "learning_rate": 9.238370974932387e-06, "loss": 144.1475, "step": 32200 }, { "epoch": 0.26645158621830667, "grad_norm": 897.6002807617188, "learning_rate": 9.23762258239189e-06, "loss": 85.8102, "step": 32210 }, { "epoch": 0.26653430946767587, "grad_norm": 848.9139404296875, "learning_rate": 9.236873852683213e-06, "loss": 107.4219, "step": 32220 }, { "epoch": 0.2666170327170451, "grad_norm": 900.8207397460938, "learning_rate": 9.23612478586593e-06, "loss": 77.9687, "step": 32230 }, { "epoch": 0.2666997559664144, "grad_norm": 1326.0458984375, "learning_rate": 9.235375381999636e-06, "loss": 106.4991, "step": 32240 }, { "epoch": 0.2667824792157836, "grad_norm": 2422.508056640625, "learning_rate": 9.234625641143962e-06, "loss": 138.341, "step": 32250 }, { "epoch": 0.26686520246515283, "grad_norm": 964.7281494140625, "learning_rate": 9.233875563358559e-06, "loss": 122.4212, "step": 32260 }, { "epoch": 0.2669479257145221, "grad_norm": 1445.0108642578125, "learning_rate": 9.23312514870311e-06, "loss": 107.632, "step": 32270 }, { "epoch": 0.2670306489638913, "grad_norm": 628.1731567382812, "learning_rate": 9.232374397237318e-06, "loss": 118.4748, "step": 32280 }, { "epoch": 0.26711337221326054, "grad_norm": 1020.5704345703125, "learning_rate": 9.231623309020922e-06, "loss": 132.8099, "step": 32290 }, { "epoch": 0.2671960954626298, "grad_norm": 949.1611938476562, "learning_rate": 9.230871884113679e-06, "loss": 128.9596, "step": 32300 }, { "epoch": 0.267278818711999, "grad_norm": 853.702880859375, "learning_rate": 9.230120122575376e-06, "loss": 117.7804, "step": 32310 }, { "epoch": 0.26736154196136824, "grad_norm": 856.49462890625, "learning_rate": 9.22936802446583e-06, "loss": 179.9062, "step": 32320 }, { "epoch": 0.2674442652107375, "grad_norm": 847.5012817382812, "learning_rate": 9.228615589844879e-06, "loss": 84.1749, "step": 32330 }, { "epoch": 0.2675269884601067, "grad_norm": 1034.2694091796875, "learning_rate": 9.227862818772392e-06, "loss": 131.7186, "step": 32340 }, { "epoch": 0.26760971170947595, "grad_norm": 1057.1470947265625, "learning_rate": 9.227109711308265e-06, "loss": 94.973, "step": 32350 }, { "epoch": 0.2676924349588452, "grad_norm": 374.79473876953125, "learning_rate": 9.226356267512417e-06, "loss": 107.7693, "step": 32360 }, { "epoch": 0.2677751582082144, "grad_norm": 817.5911865234375, "learning_rate": 9.225602487444799e-06, "loss": 107.2883, "step": 32370 }, { "epoch": 0.26785788145758366, "grad_norm": 1157.15234375, "learning_rate": 9.224848371165382e-06, "loss": 170.9429, "step": 32380 }, { "epoch": 0.2679406047069529, "grad_norm": 1658.9010009765625, "learning_rate": 9.224093918734172e-06, "loss": 202.5666, "step": 32390 }, { "epoch": 0.2680233279563221, "grad_norm": 1403.2574462890625, "learning_rate": 9.223339130211194e-06, "loss": 113.1494, "step": 32400 }, { "epoch": 0.26810605120569136, "grad_norm": 939.0480346679688, "learning_rate": 9.222584005656501e-06, "loss": 114.9759, "step": 32410 }, { "epoch": 0.2681887744550606, "grad_norm": 578.2918701171875, "learning_rate": 9.22182854513018e-06, "loss": 94.9374, "step": 32420 }, { "epoch": 0.2682714977044298, "grad_norm": 2604.21484375, "learning_rate": 9.221072748692336e-06, "loss": 109.7514, "step": 32430 }, { "epoch": 0.26835422095379907, "grad_norm": 962.8878173828125, "learning_rate": 9.220316616403109e-06, "loss": 104.4484, "step": 32440 }, { "epoch": 0.2684369442031683, "grad_norm": 759.3588256835938, "learning_rate": 9.219560148322655e-06, "loss": 101.3538, "step": 32450 }, { "epoch": 0.2685196674525375, "grad_norm": 1004.7863159179688, "learning_rate": 9.218803344511165e-06, "loss": 134.46, "step": 32460 }, { "epoch": 0.2686023907019068, "grad_norm": 1546.0279541015625, "learning_rate": 9.218046205028854e-06, "loss": 102.795, "step": 32470 }, { "epoch": 0.26868511395127603, "grad_norm": 422.1183776855469, "learning_rate": 9.217288729935966e-06, "loss": 100.5324, "step": 32480 }, { "epoch": 0.26876783720064523, "grad_norm": 953.4854736328125, "learning_rate": 9.216530919292768e-06, "loss": 147.8428, "step": 32490 }, { "epoch": 0.2688505604500145, "grad_norm": 910.6680297851562, "learning_rate": 9.215772773159556e-06, "loss": 138.0076, "step": 32500 }, { "epoch": 0.2689332836993837, "grad_norm": 1041.43505859375, "learning_rate": 9.215014291596653e-06, "loss": 120.8348, "step": 32510 }, { "epoch": 0.26901600694875294, "grad_norm": 1151.9285888671875, "learning_rate": 9.214255474664405e-06, "loss": 150.9121, "step": 32520 }, { "epoch": 0.2690987301981222, "grad_norm": 921.2622680664062, "learning_rate": 9.213496322423193e-06, "loss": 83.8476, "step": 32530 }, { "epoch": 0.2691814534474914, "grad_norm": 411.46826171875, "learning_rate": 9.212736834933413e-06, "loss": 129.1243, "step": 32540 }, { "epoch": 0.26926417669686065, "grad_norm": 3256.450927734375, "learning_rate": 9.211977012255497e-06, "loss": 87.7072, "step": 32550 }, { "epoch": 0.2693468999462299, "grad_norm": 1524.965087890625, "learning_rate": 9.211216854449903e-06, "loss": 128.8632, "step": 32560 }, { "epoch": 0.2694296231955991, "grad_norm": 953.8908081054688, "learning_rate": 9.210456361577109e-06, "loss": 113.7588, "step": 32570 }, { "epoch": 0.26951234644496835, "grad_norm": 1008.9974975585938, "learning_rate": 9.209695533697624e-06, "loss": 122.1501, "step": 32580 }, { "epoch": 0.2695950696943376, "grad_norm": 805.004150390625, "learning_rate": 9.208934370871989e-06, "loss": 113.7434, "step": 32590 }, { "epoch": 0.2696777929437068, "grad_norm": 636.7761840820312, "learning_rate": 9.20817287316076e-06, "loss": 113.1953, "step": 32600 }, { "epoch": 0.26976051619307606, "grad_norm": 763.083984375, "learning_rate": 9.20741104062453e-06, "loss": 101.9306, "step": 32610 }, { "epoch": 0.2698432394424453, "grad_norm": 1067.39208984375, "learning_rate": 9.206648873323912e-06, "loss": 104.6595, "step": 32620 }, { "epoch": 0.2699259626918145, "grad_norm": 1282.15576171875, "learning_rate": 9.205886371319548e-06, "loss": 112.2921, "step": 32630 }, { "epoch": 0.27000868594118377, "grad_norm": 1269.3165283203125, "learning_rate": 9.20512353467211e-06, "loss": 101.4175, "step": 32640 }, { "epoch": 0.270091409190553, "grad_norm": 1416.7222900390625, "learning_rate": 9.204360363442288e-06, "loss": 94.3014, "step": 32650 }, { "epoch": 0.2701741324399222, "grad_norm": 577.2730712890625, "learning_rate": 9.20359685769081e-06, "loss": 178.5912, "step": 32660 }, { "epoch": 0.2702568556892915, "grad_norm": 976.02880859375, "learning_rate": 9.202833017478421e-06, "loss": 142.0586, "step": 32670 }, { "epoch": 0.27033957893866073, "grad_norm": 1126.535888671875, "learning_rate": 9.2020688428659e-06, "loss": 100.6238, "step": 32680 }, { "epoch": 0.27042230218802993, "grad_norm": 945.1123657226562, "learning_rate": 9.201304333914042e-06, "loss": 124.4394, "step": 32690 }, { "epoch": 0.2705050254373992, "grad_norm": 600.9089965820312, "learning_rate": 9.200539490683682e-06, "loss": 93.1028, "step": 32700 }, { "epoch": 0.27058774868676844, "grad_norm": 704.4324951171875, "learning_rate": 9.19977431323567e-06, "loss": 124.6708, "step": 32710 }, { "epoch": 0.27067047193613764, "grad_norm": 938.8729858398438, "learning_rate": 9.199008801630893e-06, "loss": 119.1146, "step": 32720 }, { "epoch": 0.2707531951855069, "grad_norm": 1016.2319946289062, "learning_rate": 9.198242955930257e-06, "loss": 126.7218, "step": 32730 }, { "epoch": 0.27083591843487614, "grad_norm": 832.0853881835938, "learning_rate": 9.197476776194693e-06, "loss": 102.6724, "step": 32740 }, { "epoch": 0.27091864168424534, "grad_norm": 1260.37548828125, "learning_rate": 9.196710262485168e-06, "loss": 107.5099, "step": 32750 }, { "epoch": 0.2710013649336146, "grad_norm": 840.14990234375, "learning_rate": 9.195943414862667e-06, "loss": 124.9764, "step": 32760 }, { "epoch": 0.27108408818298385, "grad_norm": 486.6063537597656, "learning_rate": 9.195176233388206e-06, "loss": 92.0499, "step": 32770 }, { "epoch": 0.27116681143235305, "grad_norm": 907.8724975585938, "learning_rate": 9.194408718122825e-06, "loss": 120.6719, "step": 32780 }, { "epoch": 0.2712495346817223, "grad_norm": 572.4683837890625, "learning_rate": 9.193640869127592e-06, "loss": 124.2721, "step": 32790 }, { "epoch": 0.27133225793109156, "grad_norm": 957.0466918945312, "learning_rate": 9.192872686463601e-06, "loss": 131.9941, "step": 32800 }, { "epoch": 0.27141498118046076, "grad_norm": 691.4140014648438, "learning_rate": 9.192104170191973e-06, "loss": 101.0082, "step": 32810 }, { "epoch": 0.27149770442983, "grad_norm": 791.294677734375, "learning_rate": 9.191335320373856e-06, "loss": 138.9451, "step": 32820 }, { "epoch": 0.27158042767919927, "grad_norm": 1051.594482421875, "learning_rate": 9.190566137070422e-06, "loss": 107.2597, "step": 32830 }, { "epoch": 0.27166315092856846, "grad_norm": 1549.462158203125, "learning_rate": 9.189796620342875e-06, "loss": 153.74, "step": 32840 }, { "epoch": 0.2717458741779377, "grad_norm": 751.6979370117188, "learning_rate": 9.189026770252437e-06, "loss": 108.3263, "step": 32850 }, { "epoch": 0.2718285974273069, "grad_norm": 4171.58203125, "learning_rate": 9.188256586860365e-06, "loss": 177.8506, "step": 32860 }, { "epoch": 0.27191132067667617, "grad_norm": 880.1036987304688, "learning_rate": 9.187486070227938e-06, "loss": 124.1478, "step": 32870 }, { "epoch": 0.2719940439260454, "grad_norm": 875.8194580078125, "learning_rate": 9.186715220416463e-06, "loss": 80.9601, "step": 32880 }, { "epoch": 0.2720767671754146, "grad_norm": 517.209716796875, "learning_rate": 9.185944037487271e-06, "loss": 105.6458, "step": 32890 }, { "epoch": 0.2721594904247839, "grad_norm": 783.9718627929688, "learning_rate": 9.185172521501723e-06, "loss": 103.928, "step": 32900 }, { "epoch": 0.27224221367415313, "grad_norm": 809.2305297851562, "learning_rate": 9.184400672521204e-06, "loss": 119.6438, "step": 32910 }, { "epoch": 0.27232493692352233, "grad_norm": 847.0447998046875, "learning_rate": 9.183628490607129e-06, "loss": 118.6409, "step": 32920 }, { "epoch": 0.2724076601728916, "grad_norm": 803.6466674804688, "learning_rate": 9.182855975820934e-06, "loss": 86.8706, "step": 32930 }, { "epoch": 0.27249038342226084, "grad_norm": 1112.55322265625, "learning_rate": 9.182083128224086e-06, "loss": 108.3938, "step": 32940 }, { "epoch": 0.27257310667163004, "grad_norm": 758.2708740234375, "learning_rate": 9.181309947878077e-06, "loss": 136.1542, "step": 32950 }, { "epoch": 0.2726558299209993, "grad_norm": 655.5614013671875, "learning_rate": 9.180536434844426e-06, "loss": 93.3358, "step": 32960 }, { "epoch": 0.27273855317036855, "grad_norm": 746.388671875, "learning_rate": 9.179762589184676e-06, "loss": 137.545, "step": 32970 }, { "epoch": 0.27282127641973775, "grad_norm": 919.367431640625, "learning_rate": 9.1789884109604e-06, "loss": 105.9232, "step": 32980 }, { "epoch": 0.272903999669107, "grad_norm": 1165.487548828125, "learning_rate": 9.178213900233193e-06, "loss": 119.8975, "step": 32990 }, { "epoch": 0.27298672291847625, "grad_norm": 1274.4398193359375, "learning_rate": 9.177439057064684e-06, "loss": 108.2796, "step": 33000 }, { "epoch": 0.27306944616784545, "grad_norm": 2099.3134765625, "learning_rate": 9.17666388151652e-06, "loss": 88.9919, "step": 33010 }, { "epoch": 0.2731521694172147, "grad_norm": 1373.3428955078125, "learning_rate": 9.175888373650377e-06, "loss": 109.9396, "step": 33020 }, { "epoch": 0.27323489266658396, "grad_norm": 1429.390869140625, "learning_rate": 9.175112533527963e-06, "loss": 94.565, "step": 33030 }, { "epoch": 0.27331761591595316, "grad_norm": 1118.853271484375, "learning_rate": 9.174336361211007e-06, "loss": 101.014, "step": 33040 }, { "epoch": 0.2734003391653224, "grad_norm": 2152.85107421875, "learning_rate": 9.173559856761262e-06, "loss": 153.9467, "step": 33050 }, { "epoch": 0.27348306241469167, "grad_norm": 909.071533203125, "learning_rate": 9.172783020240514e-06, "loss": 102.0454, "step": 33060 }, { "epoch": 0.27356578566406087, "grad_norm": 890.9006958007812, "learning_rate": 9.172005851710573e-06, "loss": 130.9717, "step": 33070 }, { "epoch": 0.2736485089134301, "grad_norm": 1000.61279296875, "learning_rate": 9.171228351233272e-06, "loss": 150.3027, "step": 33080 }, { "epoch": 0.2737312321627994, "grad_norm": 1430.8470458984375, "learning_rate": 9.170450518870475e-06, "loss": 149.3742, "step": 33090 }, { "epoch": 0.2738139554121686, "grad_norm": 1026.9654541015625, "learning_rate": 9.169672354684069e-06, "loss": 123.5882, "step": 33100 }, { "epoch": 0.27389667866153783, "grad_norm": 841.4974975585938, "learning_rate": 9.168893858735972e-06, "loss": 92.7002, "step": 33110 }, { "epoch": 0.2739794019109071, "grad_norm": 1173.48681640625, "learning_rate": 9.168115031088122e-06, "loss": 89.6682, "step": 33120 }, { "epoch": 0.2740621251602763, "grad_norm": 733.5807495117188, "learning_rate": 9.167335871802488e-06, "loss": 86.3547, "step": 33130 }, { "epoch": 0.27414484840964554, "grad_norm": 1434.056640625, "learning_rate": 9.166556380941063e-06, "loss": 125.5328, "step": 33140 }, { "epoch": 0.2742275716590148, "grad_norm": 820.1240844726562, "learning_rate": 9.16577655856587e-06, "loss": 130.4958, "step": 33150 }, { "epoch": 0.274310294908384, "grad_norm": 840.7511596679688, "learning_rate": 9.164996404738955e-06, "loss": 160.6511, "step": 33160 }, { "epoch": 0.27439301815775324, "grad_norm": 915.5698852539062, "learning_rate": 9.16421591952239e-06, "loss": 127.9161, "step": 33170 }, { "epoch": 0.2744757414071225, "grad_norm": 1336.1126708984375, "learning_rate": 9.163435102978276e-06, "loss": 122.7304, "step": 33180 }, { "epoch": 0.2745584646564917, "grad_norm": 982.8937377929688, "learning_rate": 9.162653955168739e-06, "loss": 118.9783, "step": 33190 }, { "epoch": 0.27464118790586095, "grad_norm": 756.2153930664062, "learning_rate": 9.161872476155929e-06, "loss": 101.5269, "step": 33200 }, { "epoch": 0.2747239111552302, "grad_norm": 732.1403198242188, "learning_rate": 9.161090666002029e-06, "loss": 124.4535, "step": 33210 }, { "epoch": 0.2748066344045994, "grad_norm": 753.6777954101562, "learning_rate": 9.16030852476924e-06, "loss": 109.3473, "step": 33220 }, { "epoch": 0.27488935765396866, "grad_norm": 822.023681640625, "learning_rate": 9.159526052519794e-06, "loss": 120.7444, "step": 33230 }, { "epoch": 0.27497208090333786, "grad_norm": 925.0752563476562, "learning_rate": 9.15874324931595e-06, "loss": 100.8344, "step": 33240 }, { "epoch": 0.2750548041527071, "grad_norm": 686.9509887695312, "learning_rate": 9.157960115219993e-06, "loss": 113.704, "step": 33250 }, { "epoch": 0.27513752740207637, "grad_norm": 996.56787109375, "learning_rate": 9.157176650294231e-06, "loss": 133.5279, "step": 33260 }, { "epoch": 0.27522025065144556, "grad_norm": 728.7750244140625, "learning_rate": 9.156392854601001e-06, "loss": 133.3526, "step": 33270 }, { "epoch": 0.2753029739008148, "grad_norm": 1302.504638671875, "learning_rate": 9.155608728202669e-06, "loss": 113.5402, "step": 33280 }, { "epoch": 0.2753856971501841, "grad_norm": 857.5109252929688, "learning_rate": 9.154824271161621e-06, "loss": 83.4826, "step": 33290 }, { "epoch": 0.27546842039955327, "grad_norm": 910.7625122070312, "learning_rate": 9.154039483540273e-06, "loss": 110.0397, "step": 33300 }, { "epoch": 0.2755511436489225, "grad_norm": 529.6849365234375, "learning_rate": 9.153254365401069e-06, "loss": 125.4888, "step": 33310 }, { "epoch": 0.2756338668982918, "grad_norm": 879.7425537109375, "learning_rate": 9.152468916806477e-06, "loss": 103.2796, "step": 33320 }, { "epoch": 0.275716590147661, "grad_norm": 1299.931884765625, "learning_rate": 9.151683137818989e-06, "loss": 119.4664, "step": 33330 }, { "epoch": 0.27579931339703023, "grad_norm": 698.8023681640625, "learning_rate": 9.150897028501126e-06, "loss": 100.8363, "step": 33340 }, { "epoch": 0.2758820366463995, "grad_norm": 1195.03466796875, "learning_rate": 9.15011058891544e-06, "loss": 119.996, "step": 33350 }, { "epoch": 0.2759647598957687, "grad_norm": 1072.2655029296875, "learning_rate": 9.149323819124498e-06, "loss": 113.2403, "step": 33360 }, { "epoch": 0.27604748314513794, "grad_norm": 897.378662109375, "learning_rate": 9.148536719190904e-06, "loss": 131.1827, "step": 33370 }, { "epoch": 0.2761302063945072, "grad_norm": 900.53955078125, "learning_rate": 9.147749289177282e-06, "loss": 141.6734, "step": 33380 }, { "epoch": 0.2762129296438764, "grad_norm": 809.6741333007812, "learning_rate": 9.146961529146285e-06, "loss": 99.829, "step": 33390 }, { "epoch": 0.27629565289324565, "grad_norm": 757.1586303710938, "learning_rate": 9.146173439160591e-06, "loss": 117.2545, "step": 33400 }, { "epoch": 0.2763783761426149, "grad_norm": 763.6544799804688, "learning_rate": 9.145385019282904e-06, "loss": 135.9243, "step": 33410 }, { "epoch": 0.2764610993919841, "grad_norm": 1427.904296875, "learning_rate": 9.144596269575957e-06, "loss": 125.229, "step": 33420 }, { "epoch": 0.27654382264135335, "grad_norm": 1012.0225219726562, "learning_rate": 9.143807190102504e-06, "loss": 126.2279, "step": 33430 }, { "epoch": 0.2766265458907226, "grad_norm": 2056.858154296875, "learning_rate": 9.143017780925331e-06, "loss": 153.6504, "step": 33440 }, { "epoch": 0.2767092691400918, "grad_norm": 1273.417236328125, "learning_rate": 9.142228042107248e-06, "loss": 109.7093, "step": 33450 }, { "epoch": 0.27679199238946106, "grad_norm": 1077.05126953125, "learning_rate": 9.141437973711092e-06, "loss": 106.108, "step": 33460 }, { "epoch": 0.2768747156388303, "grad_norm": 1240.419189453125, "learning_rate": 9.14064757579972e-06, "loss": 95.5216, "step": 33470 }, { "epoch": 0.2769574388881995, "grad_norm": 1002.42724609375, "learning_rate": 9.139856848436023e-06, "loss": 117.1653, "step": 33480 }, { "epoch": 0.27704016213756877, "grad_norm": 1525.692626953125, "learning_rate": 9.139065791682916e-06, "loss": 161.3095, "step": 33490 }, { "epoch": 0.277122885386938, "grad_norm": 773.1032104492188, "learning_rate": 9.138274405603342e-06, "loss": 168.1776, "step": 33500 }, { "epoch": 0.2772056086363072, "grad_norm": 768.6693115234375, "learning_rate": 9.137482690260265e-06, "loss": 156.8843, "step": 33510 }, { "epoch": 0.2772883318856765, "grad_norm": 307.3286437988281, "learning_rate": 9.13669064571668e-06, "loss": 109.4442, "step": 33520 }, { "epoch": 0.27737105513504573, "grad_norm": 3717.694580078125, "learning_rate": 9.135898272035601e-06, "loss": 167.7826, "step": 33530 }, { "epoch": 0.27745377838441493, "grad_norm": 986.0308227539062, "learning_rate": 9.13510556928008e-06, "loss": 87.5478, "step": 33540 }, { "epoch": 0.2775365016337842, "grad_norm": 652.1424560546875, "learning_rate": 9.134312537513188e-06, "loss": 106.2764, "step": 33550 }, { "epoch": 0.27761922488315344, "grad_norm": 720.5858154296875, "learning_rate": 9.133519176798021e-06, "loss": 152.1906, "step": 33560 }, { "epoch": 0.27770194813252264, "grad_norm": 1833.9097900390625, "learning_rate": 9.132725487197701e-06, "loss": 116.5092, "step": 33570 }, { "epoch": 0.2777846713818919, "grad_norm": 1374.428955078125, "learning_rate": 9.131931468775382e-06, "loss": 132.6865, "step": 33580 }, { "epoch": 0.2778673946312611, "grad_norm": 724.4324340820312, "learning_rate": 9.131137121594239e-06, "loss": 127.3931, "step": 33590 }, { "epoch": 0.27795011788063034, "grad_norm": 594.5848999023438, "learning_rate": 9.130342445717474e-06, "loss": 117.9793, "step": 33600 }, { "epoch": 0.2780328411299996, "grad_norm": 1347.4599609375, "learning_rate": 9.129547441208317e-06, "loss": 123.5553, "step": 33610 }, { "epoch": 0.2781155643793688, "grad_norm": 825.2183837890625, "learning_rate": 9.128752108130022e-06, "loss": 109.409, "step": 33620 }, { "epoch": 0.27819828762873805, "grad_norm": 952.2738037109375, "learning_rate": 9.12795644654587e-06, "loss": 149.2746, "step": 33630 }, { "epoch": 0.2782810108781073, "grad_norm": 6804.1904296875, "learning_rate": 9.127160456519168e-06, "loss": 132.9842, "step": 33640 }, { "epoch": 0.2783637341274765, "grad_norm": 953.0177612304688, "learning_rate": 9.126364138113251e-06, "loss": 119.1077, "step": 33650 }, { "epoch": 0.27844645737684576, "grad_norm": 771.2464599609375, "learning_rate": 9.125567491391476e-06, "loss": 117.123, "step": 33660 }, { "epoch": 0.278529180626215, "grad_norm": 1822.6439208984375, "learning_rate": 9.12477051641723e-06, "loss": 151.1186, "step": 33670 }, { "epoch": 0.2786119038755842, "grad_norm": 872.4586791992188, "learning_rate": 9.123973213253923e-06, "loss": 112.0873, "step": 33680 }, { "epoch": 0.27869462712495346, "grad_norm": 494.63134765625, "learning_rate": 9.123175581964995e-06, "loss": 100.9059, "step": 33690 }, { "epoch": 0.2787773503743227, "grad_norm": 959.332275390625, "learning_rate": 9.122377622613909e-06, "loss": 106.5335, "step": 33700 }, { "epoch": 0.2788600736236919, "grad_norm": 886.007568359375, "learning_rate": 9.121579335264155e-06, "loss": 128.5588, "step": 33710 }, { "epoch": 0.27894279687306117, "grad_norm": 1174.72265625, "learning_rate": 9.120780719979248e-06, "loss": 92.5889, "step": 33720 }, { "epoch": 0.2790255201224304, "grad_norm": 2207.4111328125, "learning_rate": 9.11998177682273e-06, "loss": 132.5646, "step": 33730 }, { "epoch": 0.2791082433717996, "grad_norm": 730.7549438476562, "learning_rate": 9.11918250585817e-06, "loss": 82.5129, "step": 33740 }, { "epoch": 0.2791909666211689, "grad_norm": 876.688232421875, "learning_rate": 9.118382907149164e-06, "loss": 109.9412, "step": 33750 }, { "epoch": 0.27927368987053813, "grad_norm": 831.474609375, "learning_rate": 9.117582980759332e-06, "loss": 124.5468, "step": 33760 }, { "epoch": 0.27935641311990733, "grad_norm": 820.8474731445312, "learning_rate": 9.116782726752317e-06, "loss": 126.4644, "step": 33770 }, { "epoch": 0.2794391363692766, "grad_norm": 921.0737915039062, "learning_rate": 9.115982145191796e-06, "loss": 116.8273, "step": 33780 }, { "epoch": 0.27952185961864584, "grad_norm": 1426.6785888671875, "learning_rate": 9.115181236141463e-06, "loss": 127.0457, "step": 33790 }, { "epoch": 0.27960458286801504, "grad_norm": 1093.164794921875, "learning_rate": 9.114379999665047e-06, "loss": 85.9147, "step": 33800 }, { "epoch": 0.2796873061173843, "grad_norm": 1163.352783203125, "learning_rate": 9.113578435826295e-06, "loss": 140.9147, "step": 33810 }, { "epoch": 0.27977002936675355, "grad_norm": 412.13787841796875, "learning_rate": 9.112776544688988e-06, "loss": 127.3203, "step": 33820 }, { "epoch": 0.27985275261612275, "grad_norm": 785.0323486328125, "learning_rate": 9.111974326316926e-06, "loss": 103.5417, "step": 33830 }, { "epoch": 0.279935475865492, "grad_norm": 1045.812255859375, "learning_rate": 9.111171780773938e-06, "loss": 120.1187, "step": 33840 }, { "epoch": 0.28001819911486125, "grad_norm": 916.0906372070312, "learning_rate": 9.110368908123878e-06, "loss": 139.0241, "step": 33850 }, { "epoch": 0.28010092236423045, "grad_norm": 1398.31982421875, "learning_rate": 9.10956570843063e-06, "loss": 132.5252, "step": 33860 }, { "epoch": 0.2801836456135997, "grad_norm": 1527.2587890625, "learning_rate": 9.108762181758096e-06, "loss": 130.4855, "step": 33870 }, { "epoch": 0.28026636886296896, "grad_norm": 751.3243408203125, "learning_rate": 9.107958328170215e-06, "loss": 106.5942, "step": 33880 }, { "epoch": 0.28034909211233816, "grad_norm": 1024.8853759765625, "learning_rate": 9.10715414773094e-06, "loss": 109.6669, "step": 33890 }, { "epoch": 0.2804318153617074, "grad_norm": 1099.5294189453125, "learning_rate": 9.10634964050426e-06, "loss": 119.2499, "step": 33900 }, { "epoch": 0.28051453861107667, "grad_norm": 775.4506225585938, "learning_rate": 9.105544806554184e-06, "loss": 157.9587, "step": 33910 }, { "epoch": 0.28059726186044587, "grad_norm": 1175.566162109375, "learning_rate": 9.104739645944752e-06, "loss": 179.6702, "step": 33920 }, { "epoch": 0.2806799851098151, "grad_norm": 1368.4671630859375, "learning_rate": 9.103934158740023e-06, "loss": 129.1513, "step": 33930 }, { "epoch": 0.2807627083591844, "grad_norm": 1385.238037109375, "learning_rate": 9.10312834500409e-06, "loss": 150.3397, "step": 33940 }, { "epoch": 0.2808454316085536, "grad_norm": 1050.946044921875, "learning_rate": 9.102322204801062e-06, "loss": 118.2614, "step": 33950 }, { "epoch": 0.28092815485792283, "grad_norm": 2881.64013671875, "learning_rate": 9.101515738195084e-06, "loss": 100.6495, "step": 33960 }, { "epoch": 0.28101087810729203, "grad_norm": 394.7975769042969, "learning_rate": 9.100708945250322e-06, "loss": 81.3734, "step": 33970 }, { "epoch": 0.2810936013566613, "grad_norm": 1190.456298828125, "learning_rate": 9.099901826030969e-06, "loss": 130.64, "step": 33980 }, { "epoch": 0.28117632460603054, "grad_norm": 1374.1256103515625, "learning_rate": 9.099094380601244e-06, "loss": 119.4305, "step": 33990 }, { "epoch": 0.28125904785539974, "grad_norm": 1412.7530517578125, "learning_rate": 9.098286609025392e-06, "loss": 106.0938, "step": 34000 }, { "epoch": 0.281341771104769, "grad_norm": 617.6422119140625, "learning_rate": 9.097478511367682e-06, "loss": 119.852, "step": 34010 }, { "epoch": 0.28142449435413824, "grad_norm": 857.2781982421875, "learning_rate": 9.096670087692413e-06, "loss": 168.8287, "step": 34020 }, { "epoch": 0.28150721760350744, "grad_norm": 0.0, "learning_rate": 9.095861338063906e-06, "loss": 99.5146, "step": 34030 }, { "epoch": 0.2815899408528767, "grad_norm": 1164.4888916015625, "learning_rate": 9.09505226254651e-06, "loss": 139.5455, "step": 34040 }, { "epoch": 0.28167266410224595, "grad_norm": 1813.4522705078125, "learning_rate": 9.094242861204598e-06, "loss": 153.4502, "step": 34050 }, { "epoch": 0.28175538735161515, "grad_norm": 918.6724243164062, "learning_rate": 9.093433134102572e-06, "loss": 95.0513, "step": 34060 }, { "epoch": 0.2818381106009844, "grad_norm": 758.8914794921875, "learning_rate": 9.09262308130486e-06, "loss": 127.6234, "step": 34070 }, { "epoch": 0.28192083385035366, "grad_norm": 1214.897705078125, "learning_rate": 9.091812702875908e-06, "loss": 135.7131, "step": 34080 }, { "epoch": 0.28200355709972286, "grad_norm": 1762.4208984375, "learning_rate": 9.0910019988802e-06, "loss": 116.575, "step": 34090 }, { "epoch": 0.2820862803490921, "grad_norm": 1027.57958984375, "learning_rate": 9.09019096938224e-06, "loss": 115.0406, "step": 34100 }, { "epoch": 0.28216900359846137, "grad_norm": 549.1436157226562, "learning_rate": 9.089379614446554e-06, "loss": 101.1465, "step": 34110 }, { "epoch": 0.28225172684783056, "grad_norm": 1149.4486083984375, "learning_rate": 9.0885679341377e-06, "loss": 111.7543, "step": 34120 }, { "epoch": 0.2823344500971998, "grad_norm": 827.5548095703125, "learning_rate": 9.08775592852026e-06, "loss": 84.8386, "step": 34130 }, { "epoch": 0.2824171733465691, "grad_norm": 2282.6884765625, "learning_rate": 9.08694359765884e-06, "loss": 122.2059, "step": 34140 }, { "epoch": 0.28249989659593827, "grad_norm": 1563.5760498046875, "learning_rate": 9.086130941618075e-06, "loss": 127.1692, "step": 34150 }, { "epoch": 0.2825826198453075, "grad_norm": 546.21337890625, "learning_rate": 9.085317960462625e-06, "loss": 91.0043, "step": 34160 }, { "epoch": 0.2826653430946768, "grad_norm": 1092.4716796875, "learning_rate": 9.084504654257173e-06, "loss": 126.0462, "step": 34170 }, { "epoch": 0.282748066344046, "grad_norm": 1006.257568359375, "learning_rate": 9.08369102306643e-06, "loss": 104.315, "step": 34180 }, { "epoch": 0.28283078959341523, "grad_norm": 1649.3040771484375, "learning_rate": 9.082877066955135e-06, "loss": 101.4608, "step": 34190 }, { "epoch": 0.2829135128427845, "grad_norm": 1023.9491577148438, "learning_rate": 9.08206278598805e-06, "loss": 133.0885, "step": 34200 }, { "epoch": 0.2829962360921537, "grad_norm": 1013.92041015625, "learning_rate": 9.081248180229963e-06, "loss": 93.8945, "step": 34210 }, { "epoch": 0.28307895934152294, "grad_norm": 655.7772827148438, "learning_rate": 9.080433249745688e-06, "loss": 104.3141, "step": 34220 }, { "epoch": 0.2831616825908922, "grad_norm": 1191.8131103515625, "learning_rate": 9.079617994600066e-06, "loss": 147.7726, "step": 34230 }, { "epoch": 0.2832444058402614, "grad_norm": 628.782470703125, "learning_rate": 9.078802414857963e-06, "loss": 96.9786, "step": 34240 }, { "epoch": 0.28332712908963065, "grad_norm": 695.0119018554688, "learning_rate": 9.077986510584273e-06, "loss": 111.2695, "step": 34250 }, { "epoch": 0.2834098523389999, "grad_norm": 1148.8243408203125, "learning_rate": 9.07717028184391e-06, "loss": 104.8876, "step": 34260 }, { "epoch": 0.2834925755883691, "grad_norm": 766.49072265625, "learning_rate": 9.07635372870182e-06, "loss": 84.2609, "step": 34270 }, { "epoch": 0.28357529883773835, "grad_norm": 1150.672119140625, "learning_rate": 9.07553685122297e-06, "loss": 146.0944, "step": 34280 }, { "epoch": 0.2836580220871076, "grad_norm": 1328.8944091796875, "learning_rate": 9.074719649472358e-06, "loss": 115.0404, "step": 34290 }, { "epoch": 0.2837407453364768, "grad_norm": 1080.066650390625, "learning_rate": 9.073902123515005e-06, "loss": 144.9194, "step": 34300 }, { "epoch": 0.28382346858584606, "grad_norm": 676.450439453125, "learning_rate": 9.073084273415956e-06, "loss": 79.431, "step": 34310 }, { "epoch": 0.28390619183521526, "grad_norm": 1445.7745361328125, "learning_rate": 9.072266099240286e-06, "loss": 99.8113, "step": 34320 }, { "epoch": 0.2839889150845845, "grad_norm": 739.0885009765625, "learning_rate": 9.07144760105309e-06, "loss": 142.6431, "step": 34330 }, { "epoch": 0.28407163833395377, "grad_norm": 920.0739135742188, "learning_rate": 9.070628778919493e-06, "loss": 93.1577, "step": 34340 }, { "epoch": 0.28415436158332297, "grad_norm": 1188.843017578125, "learning_rate": 9.069809632904647e-06, "loss": 111.0898, "step": 34350 }, { "epoch": 0.2842370848326922, "grad_norm": 991.0109252929688, "learning_rate": 9.068990163073726e-06, "loss": 87.4462, "step": 34360 }, { "epoch": 0.2843198080820615, "grad_norm": 845.0187377929688, "learning_rate": 9.068170369491932e-06, "loss": 95.4965, "step": 34370 }, { "epoch": 0.2844025313314307, "grad_norm": 861.9247436523438, "learning_rate": 9.067350252224491e-06, "loss": 76.6258, "step": 34380 }, { "epoch": 0.28448525458079993, "grad_norm": 1371.8582763671875, "learning_rate": 9.066529811336658e-06, "loss": 117.6796, "step": 34390 }, { "epoch": 0.2845679778301692, "grad_norm": 909.2398681640625, "learning_rate": 9.06570904689371e-06, "loss": 96.8476, "step": 34400 }, { "epoch": 0.2846507010795384, "grad_norm": 941.552978515625, "learning_rate": 9.064887958960953e-06, "loss": 89.4982, "step": 34410 }, { "epoch": 0.28473342432890764, "grad_norm": 2689.89404296875, "learning_rate": 9.064066547603716e-06, "loss": 116.0555, "step": 34420 }, { "epoch": 0.2848161475782769, "grad_norm": 1277.172607421875, "learning_rate": 9.063244812887357e-06, "loss": 111.9663, "step": 34430 }, { "epoch": 0.2848988708276461, "grad_norm": 872.6428833007812, "learning_rate": 9.062422754877253e-06, "loss": 124.443, "step": 34440 }, { "epoch": 0.28498159407701534, "grad_norm": 698.310546875, "learning_rate": 9.061600373638816e-06, "loss": 127.376, "step": 34450 }, { "epoch": 0.2850643173263846, "grad_norm": 914.8101806640625, "learning_rate": 9.06077766923748e-06, "loss": 113.9157, "step": 34460 }, { "epoch": 0.2851470405757538, "grad_norm": 1122.4351806640625, "learning_rate": 9.059954641738697e-06, "loss": 126.3374, "step": 34470 }, { "epoch": 0.28522976382512305, "grad_norm": 1123.8736572265625, "learning_rate": 9.059131291207958e-06, "loss": 105.3611, "step": 34480 }, { "epoch": 0.2853124870744923, "grad_norm": 656.4722290039062, "learning_rate": 9.058307617710771e-06, "loss": 142.5772, "step": 34490 }, { "epoch": 0.2853952103238615, "grad_norm": 608.4116821289062, "learning_rate": 9.057483621312671e-06, "loss": 115.3732, "step": 34500 }, { "epoch": 0.28547793357323076, "grad_norm": 581.153564453125, "learning_rate": 9.056659302079222e-06, "loss": 118.1142, "step": 34510 }, { "epoch": 0.2855606568226, "grad_norm": 1226.16552734375, "learning_rate": 9.055834660076008e-06, "loss": 110.4029, "step": 34520 }, { "epoch": 0.2856433800719692, "grad_norm": 1013.7431640625, "learning_rate": 9.055009695368646e-06, "loss": 135.0033, "step": 34530 }, { "epoch": 0.28572610332133846, "grad_norm": 749.7244262695312, "learning_rate": 9.054184408022772e-06, "loss": 157.7157, "step": 34540 }, { "epoch": 0.2858088265707077, "grad_norm": 456.33026123046875, "learning_rate": 9.05335879810405e-06, "loss": 145.5047, "step": 34550 }, { "epoch": 0.2858915498200769, "grad_norm": 622.3046264648438, "learning_rate": 9.052532865678171e-06, "loss": 123.2549, "step": 34560 }, { "epoch": 0.28597427306944617, "grad_norm": 1478.7266845703125, "learning_rate": 9.05170661081085e-06, "loss": 127.2245, "step": 34570 }, { "epoch": 0.2860569963188154, "grad_norm": 619.5127563476562, "learning_rate": 9.050880033567831e-06, "loss": 132.0401, "step": 34580 }, { "epoch": 0.2861397195681846, "grad_norm": 1351.7064208984375, "learning_rate": 9.050053134014878e-06, "loss": 100.6227, "step": 34590 }, { "epoch": 0.2862224428175539, "grad_norm": 1285.1185302734375, "learning_rate": 9.049225912217782e-06, "loss": 102.2231, "step": 34600 }, { "epoch": 0.28630516606692313, "grad_norm": 1479.285888671875, "learning_rate": 9.048398368242365e-06, "loss": 93.523, "step": 34610 }, { "epoch": 0.28638788931629233, "grad_norm": 845.21826171875, "learning_rate": 9.047570502154471e-06, "loss": 97.2673, "step": 34620 }, { "epoch": 0.2864706125656616, "grad_norm": 1318.1756591796875, "learning_rate": 9.046742314019968e-06, "loss": 136.1264, "step": 34630 }, { "epoch": 0.28655333581503084, "grad_norm": 1689.4859619140625, "learning_rate": 9.045913803904748e-06, "loss": 127.8525, "step": 34640 }, { "epoch": 0.28663605906440004, "grad_norm": 759.2603149414062, "learning_rate": 9.045084971874738e-06, "loss": 119.6263, "step": 34650 }, { "epoch": 0.2867187823137693, "grad_norm": 1472.703857421875, "learning_rate": 9.04425581799588e-06, "loss": 122.0518, "step": 34660 }, { "epoch": 0.2868015055631385, "grad_norm": 903.3499755859375, "learning_rate": 9.043426342334147e-06, "loss": 104.4996, "step": 34670 }, { "epoch": 0.28688422881250775, "grad_norm": 726.6737670898438, "learning_rate": 9.042596544955538e-06, "loss": 93.7296, "step": 34680 }, { "epoch": 0.286966952061877, "grad_norm": 1467.8472900390625, "learning_rate": 9.041766425926073e-06, "loss": 109.8673, "step": 34690 }, { "epoch": 0.2870496753112462, "grad_norm": 1312.5286865234375, "learning_rate": 9.040935985311804e-06, "loss": 83.5798, "step": 34700 }, { "epoch": 0.28713239856061545, "grad_norm": 1040.613525390625, "learning_rate": 9.040105223178803e-06, "loss": 105.7686, "step": 34710 }, { "epoch": 0.2872151218099847, "grad_norm": 737.3902587890625, "learning_rate": 9.039274139593173e-06, "loss": 100.6217, "step": 34720 }, { "epoch": 0.2872978450593539, "grad_norm": 1900.810302734375, "learning_rate": 9.038442734621034e-06, "loss": 108.4988, "step": 34730 }, { "epoch": 0.28738056830872316, "grad_norm": 823.1213989257812, "learning_rate": 9.037611008328544e-06, "loss": 87.0847, "step": 34740 }, { "epoch": 0.2874632915580924, "grad_norm": 1027.710693359375, "learning_rate": 9.036778960781874e-06, "loss": 96.1623, "step": 34750 }, { "epoch": 0.2875460148074616, "grad_norm": 1012.5924682617188, "learning_rate": 9.03594659204723e-06, "loss": 99.4886, "step": 34760 }, { "epoch": 0.28762873805683087, "grad_norm": 1981.74609375, "learning_rate": 9.035113902190838e-06, "loss": 131.8146, "step": 34770 }, { "epoch": 0.2877114613062001, "grad_norm": 838.7272338867188, "learning_rate": 9.03428089127895e-06, "loss": 138.8772, "step": 34780 }, { "epoch": 0.2877941845555693, "grad_norm": 614.944091796875, "learning_rate": 9.033447559377847e-06, "loss": 102.8104, "step": 34790 }, { "epoch": 0.2878769078049386, "grad_norm": 1577.7645263671875, "learning_rate": 9.032613906553833e-06, "loss": 128.1048, "step": 34800 }, { "epoch": 0.28795963105430783, "grad_norm": 1021.2303466796875, "learning_rate": 9.031779932873238e-06, "loss": 107.6142, "step": 34810 }, { "epoch": 0.28804235430367703, "grad_norm": 644.7843017578125, "learning_rate": 9.030945638402415e-06, "loss": 103.4213, "step": 34820 }, { "epoch": 0.2881250775530463, "grad_norm": 873.067138671875, "learning_rate": 9.030111023207751e-06, "loss": 93.0126, "step": 34830 }, { "epoch": 0.28820780080241554, "grad_norm": 1150.903564453125, "learning_rate": 9.029276087355646e-06, "loss": 121.5422, "step": 34840 }, { "epoch": 0.28829052405178474, "grad_norm": 573.4194946289062, "learning_rate": 9.028440830912536e-06, "loss": 120.833, "step": 34850 }, { "epoch": 0.288373247301154, "grad_norm": 951.9305419921875, "learning_rate": 9.027605253944874e-06, "loss": 146.5991, "step": 34860 }, { "epoch": 0.28845597055052324, "grad_norm": 646.8169555664062, "learning_rate": 9.026769356519149e-06, "loss": 73.8264, "step": 34870 }, { "epoch": 0.28853869379989244, "grad_norm": 2599.574462890625, "learning_rate": 9.025933138701865e-06, "loss": 121.7481, "step": 34880 }, { "epoch": 0.2886214170492617, "grad_norm": 964.8515625, "learning_rate": 9.02509660055956e-06, "loss": 98.844, "step": 34890 }, { "epoch": 0.28870414029863095, "grad_norm": 665.4266357421875, "learning_rate": 9.02425974215879e-06, "loss": 126.3558, "step": 34900 }, { "epoch": 0.28878686354800015, "grad_norm": 747.7141723632812, "learning_rate": 9.02342256356614e-06, "loss": 94.1504, "step": 34910 }, { "epoch": 0.2888695867973694, "grad_norm": 1255.60302734375, "learning_rate": 9.022585064848222e-06, "loss": 101.1276, "step": 34920 }, { "epoch": 0.28895231004673866, "grad_norm": 1393.7789306640625, "learning_rate": 9.021747246071673e-06, "loss": 106.2699, "step": 34930 }, { "epoch": 0.28903503329610786, "grad_norm": 860.56689453125, "learning_rate": 9.020909107303152e-06, "loss": 124.7073, "step": 34940 }, { "epoch": 0.2891177565454771, "grad_norm": 982.3703002929688, "learning_rate": 9.020070648609347e-06, "loss": 112.9233, "step": 34950 }, { "epoch": 0.28920047979484637, "grad_norm": 1855.6322021484375, "learning_rate": 9.01923187005697e-06, "loss": 108.5488, "step": 34960 }, { "epoch": 0.28928320304421556, "grad_norm": 904.826416015625, "learning_rate": 9.018392771712758e-06, "loss": 125.191, "step": 34970 }, { "epoch": 0.2893659262935848, "grad_norm": 704.2764892578125, "learning_rate": 9.017553353643479e-06, "loss": 126.2956, "step": 34980 }, { "epoch": 0.2894486495429541, "grad_norm": 956.4945678710938, "learning_rate": 9.016713615915913e-06, "loss": 81.5747, "step": 34990 }, { "epoch": 0.28953137279232327, "grad_norm": 783.7576293945312, "learning_rate": 9.01587355859688e-06, "loss": 92.5522, "step": 35000 }, { "epoch": 0.2896140960416925, "grad_norm": 1208.607421875, "learning_rate": 9.015033181753219e-06, "loss": 106.08, "step": 35010 }, { "epoch": 0.2896968192910618, "grad_norm": 852.6314697265625, "learning_rate": 9.014192485451794e-06, "loss": 116.7134, "step": 35020 }, { "epoch": 0.289779542540431, "grad_norm": 1022.0242309570312, "learning_rate": 9.013351469759497e-06, "loss": 144.6814, "step": 35030 }, { "epoch": 0.28986226578980023, "grad_norm": 1243.4373779296875, "learning_rate": 9.01251013474324e-06, "loss": 104.1561, "step": 35040 }, { "epoch": 0.28994498903916943, "grad_norm": 960.918701171875, "learning_rate": 9.011668480469969e-06, "loss": 105.9182, "step": 35050 }, { "epoch": 0.2900277122885387, "grad_norm": 601.8880004882812, "learning_rate": 9.010826507006644e-06, "loss": 94.2774, "step": 35060 }, { "epoch": 0.29011043553790794, "grad_norm": 642.360107421875, "learning_rate": 9.009984214420265e-06, "loss": 142.8863, "step": 35070 }, { "epoch": 0.29019315878727714, "grad_norm": 1095.9666748046875, "learning_rate": 9.009141602777845e-06, "loss": 128.7609, "step": 35080 }, { "epoch": 0.2902758820366464, "grad_norm": 1230.556396484375, "learning_rate": 9.008298672146425e-06, "loss": 111.0792, "step": 35090 }, { "epoch": 0.29035860528601565, "grad_norm": 1034.6533203125, "learning_rate": 9.007455422593077e-06, "loss": 111.8609, "step": 35100 }, { "epoch": 0.29044132853538485, "grad_norm": 509.1571960449219, "learning_rate": 9.006611854184893e-06, "loss": 88.8575, "step": 35110 }, { "epoch": 0.2905240517847541, "grad_norm": 832.6845703125, "learning_rate": 9.00576796698899e-06, "loss": 113.1765, "step": 35120 }, { "epoch": 0.29060677503412335, "grad_norm": 1436.23388671875, "learning_rate": 9.004923761072515e-06, "loss": 103.5024, "step": 35130 }, { "epoch": 0.29068949828349255, "grad_norm": 1610.103759765625, "learning_rate": 9.004079236502636e-06, "loss": 113.4215, "step": 35140 }, { "epoch": 0.2907722215328618, "grad_norm": 881.2527465820312, "learning_rate": 9.00323439334655e-06, "loss": 123.1393, "step": 35150 }, { "epoch": 0.29085494478223106, "grad_norm": 734.708740234375, "learning_rate": 9.002389231671474e-06, "loss": 121.4382, "step": 35160 }, { "epoch": 0.29093766803160026, "grad_norm": 787.5501708984375, "learning_rate": 9.001543751544654e-06, "loss": 107.295, "step": 35170 }, { "epoch": 0.2910203912809695, "grad_norm": 1131.2176513671875, "learning_rate": 9.000697953033364e-06, "loss": 107.077, "step": 35180 }, { "epoch": 0.29110311453033877, "grad_norm": 1214.8603515625, "learning_rate": 8.999851836204901e-06, "loss": 103.6586, "step": 35190 }, { "epoch": 0.29118583777970797, "grad_norm": 895.4462280273438, "learning_rate": 8.99900540112658e-06, "loss": 83.9514, "step": 35200 }, { "epoch": 0.2912685610290772, "grad_norm": 543.6385498046875, "learning_rate": 8.998158647865753e-06, "loss": 107.0998, "step": 35210 }, { "epoch": 0.2913512842784465, "grad_norm": 1422.5147705078125, "learning_rate": 8.997311576489793e-06, "loss": 127.3802, "step": 35220 }, { "epoch": 0.2914340075278157, "grad_norm": 1031.476806640625, "learning_rate": 8.996464187066096e-06, "loss": 132.6045, "step": 35230 }, { "epoch": 0.29151673077718493, "grad_norm": 962.57568359375, "learning_rate": 8.995616479662084e-06, "loss": 76.9806, "step": 35240 }, { "epoch": 0.2915994540265542, "grad_norm": 888.2576293945312, "learning_rate": 8.994768454345207e-06, "loss": 88.927, "step": 35250 }, { "epoch": 0.2916821772759234, "grad_norm": 991.271484375, "learning_rate": 8.993920111182937e-06, "loss": 116.7842, "step": 35260 }, { "epoch": 0.29176490052529264, "grad_norm": 2302.41064453125, "learning_rate": 8.993071450242775e-06, "loss": 99.9801, "step": 35270 }, { "epoch": 0.2918476237746619, "grad_norm": 729.2933349609375, "learning_rate": 8.99222247159224e-06, "loss": 108.2754, "step": 35280 }, { "epoch": 0.2919303470240311, "grad_norm": 940.2220458984375, "learning_rate": 8.991373175298887e-06, "loss": 104.497, "step": 35290 }, { "epoch": 0.29201307027340034, "grad_norm": 893.2850952148438, "learning_rate": 8.99052356143029e-06, "loss": 104.8119, "step": 35300 }, { "epoch": 0.2920957935227696, "grad_norm": 1792.8408203125, "learning_rate": 8.989673630054044e-06, "loss": 134.8155, "step": 35310 }, { "epoch": 0.2921785167721388, "grad_norm": 823.2114868164062, "learning_rate": 8.988823381237778e-06, "loss": 91.5063, "step": 35320 }, { "epoch": 0.29226124002150805, "grad_norm": 1062.8973388671875, "learning_rate": 8.987972815049144e-06, "loss": 130.8569, "step": 35330 }, { "epoch": 0.2923439632708773, "grad_norm": 676.7745361328125, "learning_rate": 8.987121931555814e-06, "loss": 124.0575, "step": 35340 }, { "epoch": 0.2924266865202465, "grad_norm": 1002.80029296875, "learning_rate": 8.986270730825489e-06, "loss": 101.8287, "step": 35350 }, { "epoch": 0.29250940976961576, "grad_norm": 1145.8223876953125, "learning_rate": 8.985419212925898e-06, "loss": 110.8668, "step": 35360 }, { "epoch": 0.292592133018985, "grad_norm": 992.4735717773438, "learning_rate": 8.98456737792479e-06, "loss": 80.9995, "step": 35370 }, { "epoch": 0.2926748562683542, "grad_norm": 488.52618408203125, "learning_rate": 8.983715225889942e-06, "loss": 111.5234, "step": 35380 }, { "epoch": 0.29275757951772347, "grad_norm": 1136.89794921875, "learning_rate": 8.982862756889158e-06, "loss": 162.2252, "step": 35390 }, { "epoch": 0.29284030276709266, "grad_norm": 1113.3115234375, "learning_rate": 8.982009970990262e-06, "loss": 89.9033, "step": 35400 }, { "epoch": 0.2929230260164619, "grad_norm": 1392.5189208984375, "learning_rate": 8.98115686826111e-06, "loss": 131.7283, "step": 35410 }, { "epoch": 0.2930057492658312, "grad_norm": 617.0336303710938, "learning_rate": 8.980303448769574e-06, "loss": 99.9001, "step": 35420 }, { "epoch": 0.29308847251520037, "grad_norm": 2442.5810546875, "learning_rate": 8.979449712583562e-06, "loss": 112.8064, "step": 35430 }, { "epoch": 0.2931711957645696, "grad_norm": 544.6372680664062, "learning_rate": 8.978595659770997e-06, "loss": 109.6494, "step": 35440 }, { "epoch": 0.2932539190139389, "grad_norm": 860.6021118164062, "learning_rate": 8.977741290399836e-06, "loss": 106.3515, "step": 35450 }, { "epoch": 0.2933366422633081, "grad_norm": 1385.670654296875, "learning_rate": 8.976886604538055e-06, "loss": 117.2203, "step": 35460 }, { "epoch": 0.29341936551267733, "grad_norm": 653.8483276367188, "learning_rate": 8.976031602253661e-06, "loss": 91.5749, "step": 35470 }, { "epoch": 0.2935020887620466, "grad_norm": 707.4151000976562, "learning_rate": 8.975176283614677e-06, "loss": 137.304, "step": 35480 }, { "epoch": 0.2935848120114158, "grad_norm": 724.526123046875, "learning_rate": 8.97432064868916e-06, "loss": 117.3225, "step": 35490 }, { "epoch": 0.29366753526078504, "grad_norm": 684.9421997070312, "learning_rate": 8.973464697545191e-06, "loss": 118.9565, "step": 35500 }, { "epoch": 0.2937502585101543, "grad_norm": 1034.4615478515625, "learning_rate": 8.97260843025087e-06, "loss": 117.1274, "step": 35510 }, { "epoch": 0.2938329817595235, "grad_norm": 808.4039916992188, "learning_rate": 8.971751846874329e-06, "loss": 109.1277, "step": 35520 }, { "epoch": 0.29391570500889275, "grad_norm": 793.2233276367188, "learning_rate": 8.97089494748372e-06, "loss": 150.5428, "step": 35530 }, { "epoch": 0.293998428258262, "grad_norm": 1373.2613525390625, "learning_rate": 8.970037732147226e-06, "loss": 144.7835, "step": 35540 }, { "epoch": 0.2940811515076312, "grad_norm": 876.5359497070312, "learning_rate": 8.969180200933048e-06, "loss": 175.0752, "step": 35550 }, { "epoch": 0.29416387475700045, "grad_norm": 1215.04443359375, "learning_rate": 8.968322353909417e-06, "loss": 105.4971, "step": 35560 }, { "epoch": 0.2942465980063697, "grad_norm": 1001.4893188476562, "learning_rate": 8.96746419114459e-06, "loss": 109.2281, "step": 35570 }, { "epoch": 0.2943293212557389, "grad_norm": 1082.677001953125, "learning_rate": 8.966605712706844e-06, "loss": 90.3954, "step": 35580 }, { "epoch": 0.29441204450510816, "grad_norm": 962.15380859375, "learning_rate": 8.965746918664486e-06, "loss": 96.3644, "step": 35590 }, { "epoch": 0.2944947677544774, "grad_norm": 1004.3434448242188, "learning_rate": 8.964887809085846e-06, "loss": 128.4367, "step": 35600 }, { "epoch": 0.2945774910038466, "grad_norm": 535.0782470703125, "learning_rate": 8.96402838403928e-06, "loss": 101.0775, "step": 35610 }, { "epoch": 0.29466021425321587, "grad_norm": 451.95941162109375, "learning_rate": 8.96316864359317e-06, "loss": 103.9834, "step": 35620 }, { "epoch": 0.2947429375025851, "grad_norm": 861.1315307617188, "learning_rate": 8.962308587815916e-06, "loss": 95.3887, "step": 35630 }, { "epoch": 0.2948256607519543, "grad_norm": 873.6254272460938, "learning_rate": 8.961448216775955e-06, "loss": 106.6663, "step": 35640 }, { "epoch": 0.2949083840013236, "grad_norm": 647.5992431640625, "learning_rate": 8.960587530541737e-06, "loss": 113.9285, "step": 35650 }, { "epoch": 0.29499110725069283, "grad_norm": 818.12744140625, "learning_rate": 8.959726529181748e-06, "loss": 93.2586, "step": 35660 }, { "epoch": 0.29507383050006203, "grad_norm": 2233.892578125, "learning_rate": 8.95886521276449e-06, "loss": 99.4201, "step": 35670 }, { "epoch": 0.2951565537494313, "grad_norm": 1236.3218994140625, "learning_rate": 8.958003581358498e-06, "loss": 122.5037, "step": 35680 }, { "epoch": 0.29523927699880054, "grad_norm": 1387.99462890625, "learning_rate": 8.957141635032325e-06, "loss": 103.0061, "step": 35690 }, { "epoch": 0.29532200024816974, "grad_norm": 363.5679626464844, "learning_rate": 8.956279373854553e-06, "loss": 297.6621, "step": 35700 }, { "epoch": 0.295404723497539, "grad_norm": 848.06884765625, "learning_rate": 8.955416797893787e-06, "loss": 133.0075, "step": 35710 }, { "epoch": 0.29548744674690824, "grad_norm": 767.8993530273438, "learning_rate": 8.95455390721866e-06, "loss": 86.9913, "step": 35720 }, { "epoch": 0.29557016999627744, "grad_norm": 947.8824462890625, "learning_rate": 8.953690701897827e-06, "loss": 126.0984, "step": 35730 }, { "epoch": 0.2956528932456467, "grad_norm": 866.94970703125, "learning_rate": 8.952827181999973e-06, "loss": 100.0804, "step": 35740 }, { "epoch": 0.29573561649501595, "grad_norm": 997.9763793945312, "learning_rate": 8.951963347593797e-06, "loss": 158.5358, "step": 35750 }, { "epoch": 0.29581833974438515, "grad_norm": 682.8516235351562, "learning_rate": 8.951099198748036e-06, "loss": 101.2486, "step": 35760 }, { "epoch": 0.2959010629937544, "grad_norm": 804.0764770507812, "learning_rate": 8.950234735531445e-06, "loss": 98.5626, "step": 35770 }, { "epoch": 0.2959837862431236, "grad_norm": 10388.48046875, "learning_rate": 8.949369958012806e-06, "loss": 155.4089, "step": 35780 }, { "epoch": 0.29606650949249286, "grad_norm": 886.81298828125, "learning_rate": 8.948504866260924e-06, "loss": 113.0202, "step": 35790 }, { "epoch": 0.2961492327418621, "grad_norm": 897.130859375, "learning_rate": 8.94763946034463e-06, "loss": 142.2092, "step": 35800 }, { "epoch": 0.2962319559912313, "grad_norm": 966.3784790039062, "learning_rate": 8.946773740332781e-06, "loss": 157.32, "step": 35810 }, { "epoch": 0.29631467924060056, "grad_norm": 1265.8228759765625, "learning_rate": 8.945907706294262e-06, "loss": 87.6832, "step": 35820 }, { "epoch": 0.2963974024899698, "grad_norm": 434.8976135253906, "learning_rate": 8.945041358297973e-06, "loss": 115.3741, "step": 35830 }, { "epoch": 0.296480125739339, "grad_norm": 1037.9794921875, "learning_rate": 8.94417469641285e-06, "loss": 101.735, "step": 35840 }, { "epoch": 0.29656284898870827, "grad_norm": 1543.5999755859375, "learning_rate": 8.943307720707846e-06, "loss": 149.339, "step": 35850 }, { "epoch": 0.2966455722380775, "grad_norm": 851.0203247070312, "learning_rate": 8.942440431251947e-06, "loss": 126.8035, "step": 35860 }, { "epoch": 0.2967282954874467, "grad_norm": 1087.814697265625, "learning_rate": 8.941572828114154e-06, "loss": 154.3589, "step": 35870 }, { "epoch": 0.296811018736816, "grad_norm": 1085.237548828125, "learning_rate": 8.9407049113635e-06, "loss": 133.6983, "step": 35880 }, { "epoch": 0.29689374198618523, "grad_norm": 657.667724609375, "learning_rate": 8.939836681069042e-06, "loss": 77.88, "step": 35890 }, { "epoch": 0.29697646523555443, "grad_norm": 937.2635498046875, "learning_rate": 8.938968137299861e-06, "loss": 121.5767, "step": 35900 }, { "epoch": 0.2970591884849237, "grad_norm": 692.5191650390625, "learning_rate": 8.938099280125064e-06, "loss": 110.6443, "step": 35910 }, { "epoch": 0.29714191173429294, "grad_norm": 1191.0335693359375, "learning_rate": 8.937230109613778e-06, "loss": 125.6926, "step": 35920 }, { "epoch": 0.29722463498366214, "grad_norm": 600.857177734375, "learning_rate": 8.936360625835164e-06, "loss": 114.9589, "step": 35930 }, { "epoch": 0.2973073582330314, "grad_norm": 786.0075073242188, "learning_rate": 8.935490828858399e-06, "loss": 156.1116, "step": 35940 }, { "epoch": 0.29739008148240065, "grad_norm": 923.1871948242188, "learning_rate": 8.934620718752691e-06, "loss": 102.9856, "step": 35950 }, { "epoch": 0.29747280473176985, "grad_norm": 785.8792114257812, "learning_rate": 8.933750295587269e-06, "loss": 121.2862, "step": 35960 }, { "epoch": 0.2975555279811391, "grad_norm": 1262.131591796875, "learning_rate": 8.932879559431392e-06, "loss": 144.9996, "step": 35970 }, { "epoch": 0.29763825123050835, "grad_norm": 1094.29296875, "learning_rate": 8.932008510354336e-06, "loss": 99.4907, "step": 35980 }, { "epoch": 0.29772097447987755, "grad_norm": 1515.66748046875, "learning_rate": 8.931137148425407e-06, "loss": 117.0325, "step": 35990 }, { "epoch": 0.2978036977292468, "grad_norm": 702.0181274414062, "learning_rate": 8.930265473713939e-06, "loss": 89.8551, "step": 36000 }, { "epoch": 0.29788642097861606, "grad_norm": 924.1445922851562, "learning_rate": 8.929393486289283e-06, "loss": 91.7574, "step": 36010 }, { "epoch": 0.29796914422798526, "grad_norm": 886.2630615234375, "learning_rate": 8.928521186220822e-06, "loss": 134.8864, "step": 36020 }, { "epoch": 0.2980518674773545, "grad_norm": 571.9080200195312, "learning_rate": 8.92764857357796e-06, "loss": 166.6288, "step": 36030 }, { "epoch": 0.29813459072672377, "grad_norm": 1017.0745239257812, "learning_rate": 8.926775648430124e-06, "loss": 97.3446, "step": 36040 }, { "epoch": 0.29821731397609297, "grad_norm": 1384.15869140625, "learning_rate": 8.925902410846774e-06, "loss": 102.3454, "step": 36050 }, { "epoch": 0.2983000372254622, "grad_norm": 627.1011962890625, "learning_rate": 8.925028860897384e-06, "loss": 99.8053, "step": 36060 }, { "epoch": 0.2983827604748315, "grad_norm": 1164.1707763671875, "learning_rate": 8.924154998651461e-06, "loss": 150.2465, "step": 36070 }, { "epoch": 0.2984654837242007, "grad_norm": 806.7046508789062, "learning_rate": 8.923280824178538e-06, "loss": 127.2189, "step": 36080 }, { "epoch": 0.29854820697356993, "grad_norm": 936.9692993164062, "learning_rate": 8.922406337548162e-06, "loss": 106.9401, "step": 36090 }, { "epoch": 0.2986309302229392, "grad_norm": 861.8311157226562, "learning_rate": 8.921531538829917e-06, "loss": 92.577, "step": 36100 }, { "epoch": 0.2987136534723084, "grad_norm": 901.859375, "learning_rate": 8.920656428093403e-06, "loss": 83.8378, "step": 36110 }, { "epoch": 0.29879637672167764, "grad_norm": 1293.734375, "learning_rate": 8.919781005408251e-06, "loss": 114.6592, "step": 36120 }, { "epoch": 0.29887909997104684, "grad_norm": 697.5694580078125, "learning_rate": 8.918905270844113e-06, "loss": 141.8754, "step": 36130 }, { "epoch": 0.2989618232204161, "grad_norm": 822.4385986328125, "learning_rate": 8.918029224470671e-06, "loss": 101.8231, "step": 36140 }, { "epoch": 0.29904454646978534, "grad_norm": 886.5821533203125, "learning_rate": 8.917152866357621e-06, "loss": 97.08, "step": 36150 }, { "epoch": 0.29912726971915454, "grad_norm": 1727.11279296875, "learning_rate": 8.916276196574698e-06, "loss": 124.1994, "step": 36160 }, { "epoch": 0.2992099929685238, "grad_norm": 649.4053955078125, "learning_rate": 8.91539921519165e-06, "loss": 113.0979, "step": 36170 }, { "epoch": 0.29929271621789305, "grad_norm": 762.2130737304688, "learning_rate": 8.914521922278255e-06, "loss": 118.0666, "step": 36180 }, { "epoch": 0.29937543946726225, "grad_norm": 995.956787109375, "learning_rate": 8.913644317904317e-06, "loss": 125.7407, "step": 36190 }, { "epoch": 0.2994581627166315, "grad_norm": 1194.2001953125, "learning_rate": 8.912766402139662e-06, "loss": 124.1992, "step": 36200 }, { "epoch": 0.29954088596600076, "grad_norm": 1277.1484375, "learning_rate": 8.91188817505414e-06, "loss": 146.9492, "step": 36210 }, { "epoch": 0.29962360921536996, "grad_norm": 992.379638671875, "learning_rate": 8.91100963671763e-06, "loss": 107.3992, "step": 36220 }, { "epoch": 0.2997063324647392, "grad_norm": 820.5222778320312, "learning_rate": 8.910130787200032e-06, "loss": 93.3464, "step": 36230 }, { "epoch": 0.29978905571410847, "grad_norm": 2073.3251953125, "learning_rate": 8.909251626571273e-06, "loss": 101.3619, "step": 36240 }, { "epoch": 0.29987177896347766, "grad_norm": 767.80615234375, "learning_rate": 8.908372154901302e-06, "loss": 89.5982, "step": 36250 }, { "epoch": 0.2999545022128469, "grad_norm": 1339.1114501953125, "learning_rate": 8.907492372260096e-06, "loss": 118.8273, "step": 36260 }, { "epoch": 0.3000372254622162, "grad_norm": 1395.037353515625, "learning_rate": 8.906612278717657e-06, "loss": 114.5038, "step": 36270 }, { "epoch": 0.30011994871158537, "grad_norm": 1279.881591796875, "learning_rate": 8.905731874344005e-06, "loss": 110.8277, "step": 36280 }, { "epoch": 0.3002026719609546, "grad_norm": 1153.6810302734375, "learning_rate": 8.904851159209193e-06, "loss": 111.3379, "step": 36290 }, { "epoch": 0.3002853952103239, "grad_norm": 910.88623046875, "learning_rate": 8.903970133383297e-06, "loss": 83.3806, "step": 36300 }, { "epoch": 0.3003681184596931, "grad_norm": 993.2054443359375, "learning_rate": 8.903088796936414e-06, "loss": 117.134, "step": 36310 }, { "epoch": 0.30045084170906233, "grad_norm": 1444.7095947265625, "learning_rate": 8.902207149938667e-06, "loss": 118.84, "step": 36320 }, { "epoch": 0.3005335649584316, "grad_norm": 722.3483276367188, "learning_rate": 8.901325192460206e-06, "loss": 100.5878, "step": 36330 }, { "epoch": 0.3006162882078008, "grad_norm": 2218.97119140625, "learning_rate": 8.900442924571204e-06, "loss": 125.5503, "step": 36340 }, { "epoch": 0.30069901145717004, "grad_norm": 933.83984375, "learning_rate": 8.89956034634186e-06, "loss": 100.4451, "step": 36350 }, { "epoch": 0.3007817347065393, "grad_norm": 1003.083251953125, "learning_rate": 8.898677457842394e-06, "loss": 97.8874, "step": 36360 }, { "epoch": 0.3008644579559085, "grad_norm": 1543.9967041015625, "learning_rate": 8.897794259143057e-06, "loss": 144.2935, "step": 36370 }, { "epoch": 0.30094718120527775, "grad_norm": 1334.9381103515625, "learning_rate": 8.896910750314118e-06, "loss": 91.8307, "step": 36380 }, { "epoch": 0.301029904454647, "grad_norm": 1186.411865234375, "learning_rate": 8.896026931425876e-06, "loss": 131.1232, "step": 36390 }, { "epoch": 0.3011126277040162, "grad_norm": 1868.9034423828125, "learning_rate": 8.895142802548653e-06, "loss": 124.4849, "step": 36400 }, { "epoch": 0.30119535095338545, "grad_norm": 840.4161987304688, "learning_rate": 8.89425836375279e-06, "loss": 81.0873, "step": 36410 }, { "epoch": 0.3012780742027547, "grad_norm": 1163.7984619140625, "learning_rate": 8.893373615108663e-06, "loss": 136.477, "step": 36420 }, { "epoch": 0.3013607974521239, "grad_norm": 1086.72216796875, "learning_rate": 8.892488556686665e-06, "loss": 126.1113, "step": 36430 }, { "epoch": 0.30144352070149316, "grad_norm": 879.4309692382812, "learning_rate": 8.891603188557218e-06, "loss": 96.8926, "step": 36440 }, { "epoch": 0.3015262439508624, "grad_norm": 1197.5047607421875, "learning_rate": 8.890717510790763e-06, "loss": 110.1916, "step": 36450 }, { "epoch": 0.3016089672002316, "grad_norm": 349.7181701660156, "learning_rate": 8.889831523457773e-06, "loss": 154.8354, "step": 36460 }, { "epoch": 0.30169169044960087, "grad_norm": 1460.5762939453125, "learning_rate": 8.888945226628742e-06, "loss": 140.0803, "step": 36470 }, { "epoch": 0.3017744136989701, "grad_norm": 665.8645629882812, "learning_rate": 8.888058620374185e-06, "loss": 92.3529, "step": 36480 }, { "epoch": 0.3018571369483393, "grad_norm": 879.9156494140625, "learning_rate": 8.887171704764647e-06, "loss": 129.1003, "step": 36490 }, { "epoch": 0.3019398601977086, "grad_norm": 815.3402709960938, "learning_rate": 8.8862844798707e-06, "loss": 118.2584, "step": 36500 }, { "epoch": 0.3020225834470778, "grad_norm": 1314.0321044921875, "learning_rate": 8.885396945762928e-06, "loss": 126.3761, "step": 36510 }, { "epoch": 0.30210530669644703, "grad_norm": 858.8338012695312, "learning_rate": 8.884509102511956e-06, "loss": 98.5212, "step": 36520 }, { "epoch": 0.3021880299458163, "grad_norm": 1700.220458984375, "learning_rate": 8.883620950188422e-06, "loss": 106.6714, "step": 36530 }, { "epoch": 0.3022707531951855, "grad_norm": 696.0690307617188, "learning_rate": 8.882732488862988e-06, "loss": 118.2453, "step": 36540 }, { "epoch": 0.30235347644455474, "grad_norm": 793.1904907226562, "learning_rate": 8.881843718606353e-06, "loss": 125.0236, "step": 36550 }, { "epoch": 0.302436199693924, "grad_norm": 970.5441284179688, "learning_rate": 8.880954639489227e-06, "loss": 108.3671, "step": 36560 }, { "epoch": 0.3025189229432932, "grad_norm": 1777.4910888671875, "learning_rate": 8.880065251582354e-06, "loss": 160.0988, "step": 36570 }, { "epoch": 0.30260164619266244, "grad_norm": 1037.1177978515625, "learning_rate": 8.879175554956495e-06, "loss": 104.0029, "step": 36580 }, { "epoch": 0.3026843694420317, "grad_norm": 806.9476928710938, "learning_rate": 8.87828554968244e-06, "loss": 88.7504, "step": 36590 }, { "epoch": 0.3027670926914009, "grad_norm": 1243.7362060546875, "learning_rate": 8.877395235831002e-06, "loss": 111.0507, "step": 36600 }, { "epoch": 0.30284981594077015, "grad_norm": 1789.92529296875, "learning_rate": 8.876504613473019e-06, "loss": 154.0786, "step": 36610 }, { "epoch": 0.3029325391901394, "grad_norm": 718.7664184570312, "learning_rate": 8.875613682679356e-06, "loss": 108.9329, "step": 36620 }, { "epoch": 0.3030152624395086, "grad_norm": 748.9815063476562, "learning_rate": 8.874722443520898e-06, "loss": 123.7988, "step": 36630 }, { "epoch": 0.30309798568887786, "grad_norm": 606.9013671875, "learning_rate": 8.873830896068559e-06, "loss": 107.3505, "step": 36640 }, { "epoch": 0.3031807089382471, "grad_norm": 878.5547485351562, "learning_rate": 8.872939040393274e-06, "loss": 113.0779, "step": 36650 }, { "epoch": 0.3032634321876163, "grad_norm": 1300.362060546875, "learning_rate": 8.872046876566003e-06, "loss": 130.3682, "step": 36660 }, { "epoch": 0.30334615543698557, "grad_norm": 886.26025390625, "learning_rate": 8.871154404657734e-06, "loss": 106.1408, "step": 36670 }, { "epoch": 0.3034288786863548, "grad_norm": 1735.70947265625, "learning_rate": 8.870261624739474e-06, "loss": 120.6958, "step": 36680 }, { "epoch": 0.303511601935724, "grad_norm": 886.688232421875, "learning_rate": 8.869368536882258e-06, "loss": 102.5698, "step": 36690 }, { "epoch": 0.3035943251850933, "grad_norm": 632.3603515625, "learning_rate": 8.868475141157146e-06, "loss": 88.5606, "step": 36700 }, { "epoch": 0.3036770484344625, "grad_norm": 764.1319580078125, "learning_rate": 8.867581437635221e-06, "loss": 107.0108, "step": 36710 }, { "epoch": 0.3037597716838317, "grad_norm": 1273.7620849609375, "learning_rate": 8.866687426387592e-06, "loss": 159.5809, "step": 36720 }, { "epoch": 0.303842494933201, "grad_norm": 685.6810302734375, "learning_rate": 8.86579310748539e-06, "loss": 99.7657, "step": 36730 }, { "epoch": 0.30392521818257023, "grad_norm": 777.0698852539062, "learning_rate": 8.86489848099977e-06, "loss": 120.2746, "step": 36740 }, { "epoch": 0.30400794143193943, "grad_norm": 746.861572265625, "learning_rate": 8.864003547001916e-06, "loss": 106.8211, "step": 36750 }, { "epoch": 0.3040906646813087, "grad_norm": 853.1625366210938, "learning_rate": 8.863108305563035e-06, "loss": 91.7284, "step": 36760 }, { "epoch": 0.30417338793067794, "grad_norm": 1231.1922607421875, "learning_rate": 8.862212756754354e-06, "loss": 155.4766, "step": 36770 }, { "epoch": 0.30425611118004714, "grad_norm": 1215.257568359375, "learning_rate": 8.861316900647129e-06, "loss": 149.8827, "step": 36780 }, { "epoch": 0.3043388344294164, "grad_norm": 3567.228515625, "learning_rate": 8.860420737312638e-06, "loss": 121.5637, "step": 36790 }, { "epoch": 0.30442155767878565, "grad_norm": 694.6704711914062, "learning_rate": 8.859524266822188e-06, "loss": 101.825, "step": 36800 }, { "epoch": 0.30450428092815485, "grad_norm": 1621.055908203125, "learning_rate": 8.858627489247105e-06, "loss": 128.4847, "step": 36810 }, { "epoch": 0.3045870041775241, "grad_norm": 974.5038452148438, "learning_rate": 8.85773040465874e-06, "loss": 157.9853, "step": 36820 }, { "epoch": 0.30466972742689336, "grad_norm": 1332.6234130859375, "learning_rate": 8.856833013128472e-06, "loss": 107.964, "step": 36830 }, { "epoch": 0.30475245067626255, "grad_norm": 1167.7923583984375, "learning_rate": 8.855935314727702e-06, "loss": 109.8553, "step": 36840 }, { "epoch": 0.3048351739256318, "grad_norm": 704.8397216796875, "learning_rate": 8.855037309527854e-06, "loss": 115.8193, "step": 36850 }, { "epoch": 0.304917897175001, "grad_norm": 883.3186645507812, "learning_rate": 8.854138997600382e-06, "loss": 91.2245, "step": 36860 }, { "epoch": 0.30500062042437026, "grad_norm": 660.802001953125, "learning_rate": 8.853240379016757e-06, "loss": 88.3629, "step": 36870 }, { "epoch": 0.3050833436737395, "grad_norm": 1194.5704345703125, "learning_rate": 8.852341453848477e-06, "loss": 113.7338, "step": 36880 }, { "epoch": 0.3051660669231087, "grad_norm": 1196.6671142578125, "learning_rate": 8.851442222167068e-06, "loss": 96.392, "step": 36890 }, { "epoch": 0.30524879017247797, "grad_norm": 1096.315673828125, "learning_rate": 8.850542684044078e-06, "loss": 132.9981, "step": 36900 }, { "epoch": 0.3053315134218472, "grad_norm": 843.752685546875, "learning_rate": 8.849642839551079e-06, "loss": 120.9463, "step": 36910 }, { "epoch": 0.3054142366712164, "grad_norm": 1700.408203125, "learning_rate": 8.848742688759666e-06, "loss": 106.4173, "step": 36920 }, { "epoch": 0.3054969599205857, "grad_norm": 1714.5902099609375, "learning_rate": 8.847842231741462e-06, "loss": 119.7494, "step": 36930 }, { "epoch": 0.30557968316995493, "grad_norm": 998.3058471679688, "learning_rate": 8.846941468568108e-06, "loss": 104.204, "step": 36940 }, { "epoch": 0.30566240641932413, "grad_norm": 1132.1826171875, "learning_rate": 8.846040399311278e-06, "loss": 112.0986, "step": 36950 }, { "epoch": 0.3057451296686934, "grad_norm": 661.6875, "learning_rate": 8.845139024042664e-06, "loss": 81.744, "step": 36960 }, { "epoch": 0.30582785291806264, "grad_norm": 932.6510009765625, "learning_rate": 8.844237342833985e-06, "loss": 127.1373, "step": 36970 }, { "epoch": 0.30591057616743184, "grad_norm": 780.3368530273438, "learning_rate": 8.843335355756983e-06, "loss": 75.523, "step": 36980 }, { "epoch": 0.3059932994168011, "grad_norm": 754.1832275390625, "learning_rate": 8.842433062883427e-06, "loss": 89.4337, "step": 36990 }, { "epoch": 0.30607602266617034, "grad_norm": 764.4425659179688, "learning_rate": 8.841530464285105e-06, "loss": 129.0274, "step": 37000 }, { "epoch": 0.30615874591553954, "grad_norm": 861.7986450195312, "learning_rate": 8.840627560033833e-06, "loss": 82.3673, "step": 37010 }, { "epoch": 0.3062414691649088, "grad_norm": 988.38916015625, "learning_rate": 8.839724350201452e-06, "loss": 114.8146, "step": 37020 }, { "epoch": 0.30632419241427805, "grad_norm": 530.607421875, "learning_rate": 8.838820834859829e-06, "loss": 103.93, "step": 37030 }, { "epoch": 0.30640691566364725, "grad_norm": 941.2070922851562, "learning_rate": 8.837917014080849e-06, "loss": 89.9074, "step": 37040 }, { "epoch": 0.3064896389130165, "grad_norm": 688.3717651367188, "learning_rate": 8.837012887936426e-06, "loss": 111.2304, "step": 37050 }, { "epoch": 0.30657236216238576, "grad_norm": 1092.7115478515625, "learning_rate": 8.836108456498497e-06, "loss": 93.2177, "step": 37060 }, { "epoch": 0.30665508541175496, "grad_norm": 970.1156616210938, "learning_rate": 8.835203719839024e-06, "loss": 113.6382, "step": 37070 }, { "epoch": 0.3067378086611242, "grad_norm": 1119.5540771484375, "learning_rate": 8.834298678029988e-06, "loss": 120.2044, "step": 37080 }, { "epoch": 0.30682053191049347, "grad_norm": 840.0438842773438, "learning_rate": 8.833393331143409e-06, "loss": 103.9367, "step": 37090 }, { "epoch": 0.30690325515986266, "grad_norm": 1232.6182861328125, "learning_rate": 8.832487679251311e-06, "loss": 107.9942, "step": 37100 }, { "epoch": 0.3069859784092319, "grad_norm": 702.6771850585938, "learning_rate": 8.831581722425761e-06, "loss": 107.5534, "step": 37110 }, { "epoch": 0.3070687016586012, "grad_norm": 933.55615234375, "learning_rate": 8.830675460738835e-06, "loss": 92.3703, "step": 37120 }, { "epoch": 0.30715142490797037, "grad_norm": 1156.6966552734375, "learning_rate": 8.829768894262644e-06, "loss": 118.0975, "step": 37130 }, { "epoch": 0.3072341481573396, "grad_norm": 766.1238403320312, "learning_rate": 8.82886202306932e-06, "loss": 91.8914, "step": 37140 }, { "epoch": 0.3073168714067089, "grad_norm": 1076.5308837890625, "learning_rate": 8.827954847231016e-06, "loss": 115.0902, "step": 37150 }, { "epoch": 0.3073995946560781, "grad_norm": 867.0706176757812, "learning_rate": 8.82704736681991e-06, "loss": 128.4827, "step": 37160 }, { "epoch": 0.30748231790544733, "grad_norm": 963.2921752929688, "learning_rate": 8.826139581908211e-06, "loss": 112.9323, "step": 37170 }, { "epoch": 0.3075650411548166, "grad_norm": 735.8433227539062, "learning_rate": 8.825231492568146e-06, "loss": 114.1932, "step": 37180 }, { "epoch": 0.3076477644041858, "grad_norm": 1258.63818359375, "learning_rate": 8.824323098871966e-06, "loss": 136.7632, "step": 37190 }, { "epoch": 0.30773048765355504, "grad_norm": 1213.376220703125, "learning_rate": 8.823414400891948e-06, "loss": 140.9363, "step": 37200 }, { "epoch": 0.30781321090292424, "grad_norm": 816.0035400390625, "learning_rate": 8.822505398700395e-06, "loss": 97.0494, "step": 37210 }, { "epoch": 0.3078959341522935, "grad_norm": 908.9609375, "learning_rate": 8.821596092369627e-06, "loss": 112.1852, "step": 37220 }, { "epoch": 0.30797865740166275, "grad_norm": 1214.3897705078125, "learning_rate": 8.820686481971998e-06, "loss": 133.1782, "step": 37230 }, { "epoch": 0.30806138065103195, "grad_norm": 1155.935546875, "learning_rate": 8.81977656757988e-06, "loss": 91.5354, "step": 37240 }, { "epoch": 0.3081441039004012, "grad_norm": 1258.32275390625, "learning_rate": 8.81886634926567e-06, "loss": 108.436, "step": 37250 }, { "epoch": 0.30822682714977045, "grad_norm": 1130.4833984375, "learning_rate": 8.817955827101794e-06, "loss": 133.1508, "step": 37260 }, { "epoch": 0.30830955039913965, "grad_norm": 738.8419799804688, "learning_rate": 8.817045001160693e-06, "loss": 122.5803, "step": 37270 }, { "epoch": 0.3083922736485089, "grad_norm": 913.4530639648438, "learning_rate": 8.816133871514838e-06, "loss": 108.7282, "step": 37280 }, { "epoch": 0.30847499689787816, "grad_norm": 600.1754150390625, "learning_rate": 8.815222438236726e-06, "loss": 91.7117, "step": 37290 }, { "epoch": 0.30855772014724736, "grad_norm": 987.5612182617188, "learning_rate": 8.814310701398873e-06, "loss": 111.6003, "step": 37300 }, { "epoch": 0.3086404433966166, "grad_norm": 718.8853759765625, "learning_rate": 8.813398661073823e-06, "loss": 120.6641, "step": 37310 }, { "epoch": 0.30872316664598587, "grad_norm": 969.3789672851562, "learning_rate": 8.812486317334145e-06, "loss": 112.9521, "step": 37320 }, { "epoch": 0.30880588989535507, "grad_norm": 1069.70849609375, "learning_rate": 8.811573670252426e-06, "loss": 102.9678, "step": 37330 }, { "epoch": 0.3088886131447243, "grad_norm": 1160.0357666015625, "learning_rate": 8.810660719901283e-06, "loss": 142.8662, "step": 37340 }, { "epoch": 0.3089713363940936, "grad_norm": 923.1897583007812, "learning_rate": 8.809747466353356e-06, "loss": 134.5727, "step": 37350 }, { "epoch": 0.3090540596434628, "grad_norm": 1227.94580078125, "learning_rate": 8.808833909681305e-06, "loss": 144.0661, "step": 37360 }, { "epoch": 0.30913678289283203, "grad_norm": 636.2891235351562, "learning_rate": 8.80792004995782e-06, "loss": 120.0885, "step": 37370 }, { "epoch": 0.3092195061422013, "grad_norm": 1097.7366943359375, "learning_rate": 8.807005887255615e-06, "loss": 138.4748, "step": 37380 }, { "epoch": 0.3093022293915705, "grad_norm": 814.5217895507812, "learning_rate": 8.806091421647423e-06, "loss": 113.6995, "step": 37390 }, { "epoch": 0.30938495264093974, "grad_norm": 737.350341796875, "learning_rate": 8.805176653206004e-06, "loss": 116.3498, "step": 37400 }, { "epoch": 0.309467675890309, "grad_norm": 2945.936279296875, "learning_rate": 8.80426158200414e-06, "loss": 109.3581, "step": 37410 }, { "epoch": 0.3095503991396782, "grad_norm": 1349.6761474609375, "learning_rate": 8.803346208114643e-06, "loss": 117.0218, "step": 37420 }, { "epoch": 0.30963312238904744, "grad_norm": 744.024658203125, "learning_rate": 8.802430531610344e-06, "loss": 101.151, "step": 37430 }, { "epoch": 0.3097158456384167, "grad_norm": 1006.9130859375, "learning_rate": 8.801514552564097e-06, "loss": 87.0184, "step": 37440 }, { "epoch": 0.3097985688877859, "grad_norm": 1106.60693359375, "learning_rate": 8.800598271048784e-06, "loss": 159.6884, "step": 37450 }, { "epoch": 0.30988129213715515, "grad_norm": 1054.00244140625, "learning_rate": 8.799681687137309e-06, "loss": 97.845, "step": 37460 }, { "epoch": 0.3099640153865244, "grad_norm": 551.2701416015625, "learning_rate": 8.7987648009026e-06, "loss": 87.5821, "step": 37470 }, { "epoch": 0.3100467386358936, "grad_norm": 2352.765869140625, "learning_rate": 8.79784761241761e-06, "loss": 144.6846, "step": 37480 }, { "epoch": 0.31012946188526286, "grad_norm": 899.6213989257812, "learning_rate": 8.796930121755315e-06, "loss": 110.2097, "step": 37490 }, { "epoch": 0.3102121851346321, "grad_norm": 527.784912109375, "learning_rate": 8.796012328988716e-06, "loss": 98.5166, "step": 37500 }, { "epoch": 0.3102949083840013, "grad_norm": 1046.642822265625, "learning_rate": 8.795094234190837e-06, "loss": 94.5682, "step": 37510 }, { "epoch": 0.31037763163337057, "grad_norm": 1084.1600341796875, "learning_rate": 8.794175837434729e-06, "loss": 141.1946, "step": 37520 }, { "epoch": 0.3104603548827398, "grad_norm": 761.6773071289062, "learning_rate": 8.79325713879346e-06, "loss": 87.328, "step": 37530 }, { "epoch": 0.310543078132109, "grad_norm": 1374.055908203125, "learning_rate": 8.792338138340131e-06, "loss": 102.905, "step": 37540 }, { "epoch": 0.3106258013814783, "grad_norm": 935.9447631835938, "learning_rate": 8.791418836147858e-06, "loss": 146.4921, "step": 37550 }, { "epoch": 0.3107085246308475, "grad_norm": 1152.4400634765625, "learning_rate": 8.790499232289793e-06, "loss": 128.4351, "step": 37560 }, { "epoch": 0.3107912478802167, "grad_norm": 658.3630981445312, "learning_rate": 8.789579326839097e-06, "loss": 121.7294, "step": 37570 }, { "epoch": 0.310873971129586, "grad_norm": 830.0960083007812, "learning_rate": 8.788659119868966e-06, "loss": 133.9257, "step": 37580 }, { "epoch": 0.3109566943789552, "grad_norm": 866.0687255859375, "learning_rate": 8.787738611452616e-06, "loss": 116.4662, "step": 37590 }, { "epoch": 0.31103941762832443, "grad_norm": 1266.32177734375, "learning_rate": 8.78681780166329e-06, "loss": 120.4589, "step": 37600 }, { "epoch": 0.3111221408776937, "grad_norm": 876.0301513671875, "learning_rate": 8.785896690574248e-06, "loss": 103.7038, "step": 37610 }, { "epoch": 0.3112048641270629, "grad_norm": 822.327880859375, "learning_rate": 8.784975278258783e-06, "loss": 146.5088, "step": 37620 }, { "epoch": 0.31128758737643214, "grad_norm": 976.1931762695312, "learning_rate": 8.784053564790205e-06, "loss": 110.9248, "step": 37630 }, { "epoch": 0.3113703106258014, "grad_norm": 1754.4827880859375, "learning_rate": 8.783131550241853e-06, "loss": 132.1888, "step": 37640 }, { "epoch": 0.3114530338751706, "grad_norm": 889.1416015625, "learning_rate": 8.782209234687083e-06, "loss": 98.5607, "step": 37650 }, { "epoch": 0.31153575712453985, "grad_norm": 578.06689453125, "learning_rate": 8.781286618199285e-06, "loss": 93.0681, "step": 37660 }, { "epoch": 0.3116184803739091, "grad_norm": 625.0341796875, "learning_rate": 8.780363700851863e-06, "loss": 84.4234, "step": 37670 }, { "epoch": 0.3117012036232783, "grad_norm": 1180.2471923828125, "learning_rate": 8.779440482718251e-06, "loss": 126.0896, "step": 37680 }, { "epoch": 0.31178392687264755, "grad_norm": 1036.988525390625, "learning_rate": 8.778516963871904e-06, "loss": 109.445, "step": 37690 }, { "epoch": 0.3118666501220168, "grad_norm": 771.7061157226562, "learning_rate": 8.777593144386305e-06, "loss": 106.4233, "step": 37700 }, { "epoch": 0.311949373371386, "grad_norm": 853.5861206054688, "learning_rate": 8.776669024334955e-06, "loss": 149.2146, "step": 37710 }, { "epoch": 0.31203209662075526, "grad_norm": 1038.280029296875, "learning_rate": 8.775744603791385e-06, "loss": 87.6942, "step": 37720 }, { "epoch": 0.3121148198701245, "grad_norm": 785.39892578125, "learning_rate": 8.774819882829144e-06, "loss": 101.3138, "step": 37730 }, { "epoch": 0.3121975431194937, "grad_norm": 810.301513671875, "learning_rate": 8.77389486152181e-06, "loss": 104.7605, "step": 37740 }, { "epoch": 0.31228026636886297, "grad_norm": 907.918212890625, "learning_rate": 8.772969539942981e-06, "loss": 93.2778, "step": 37750 }, { "epoch": 0.3123629896182322, "grad_norm": 1630.78955078125, "learning_rate": 8.772043918166282e-06, "loss": 118.6197, "step": 37760 }, { "epoch": 0.3124457128676014, "grad_norm": 792.9736328125, "learning_rate": 8.771117996265358e-06, "loss": 110.336, "step": 37770 }, { "epoch": 0.3125284361169707, "grad_norm": 515.8519287109375, "learning_rate": 8.770191774313883e-06, "loss": 98.6496, "step": 37780 }, { "epoch": 0.31261115936633993, "grad_norm": 889.776611328125, "learning_rate": 8.769265252385552e-06, "loss": 165.6621, "step": 37790 }, { "epoch": 0.31269388261570913, "grad_norm": 780.7835693359375, "learning_rate": 8.768338430554083e-06, "loss": 91.5842, "step": 37800 }, { "epoch": 0.3127766058650784, "grad_norm": 1138.0804443359375, "learning_rate": 8.76741130889322e-06, "loss": 100.7134, "step": 37810 }, { "epoch": 0.31285932911444764, "grad_norm": 942.5914916992188, "learning_rate": 8.766483887476727e-06, "loss": 105.6115, "step": 37820 }, { "epoch": 0.31294205236381684, "grad_norm": 618.5468139648438, "learning_rate": 8.7655561663784e-06, "loss": 75.4478, "step": 37830 }, { "epoch": 0.3130247756131861, "grad_norm": 1693.069580078125, "learning_rate": 8.764628145672048e-06, "loss": 127.7167, "step": 37840 }, { "epoch": 0.31310749886255534, "grad_norm": 1026.7965087890625, "learning_rate": 8.763699825431513e-06, "loss": 138.911, "step": 37850 }, { "epoch": 0.31319022211192454, "grad_norm": 1648.09765625, "learning_rate": 8.762771205730656e-06, "loss": 117.1393, "step": 37860 }, { "epoch": 0.3132729453612938, "grad_norm": 933.2096557617188, "learning_rate": 8.761842286643362e-06, "loss": 96.2922, "step": 37870 }, { "epoch": 0.31335566861066305, "grad_norm": 802.694580078125, "learning_rate": 8.760913068243542e-06, "loss": 144.4842, "step": 37880 }, { "epoch": 0.31343839186003225, "grad_norm": 1243.0850830078125, "learning_rate": 8.759983550605132e-06, "loss": 128.3055, "step": 37890 }, { "epoch": 0.3135211151094015, "grad_norm": 1257.254638671875, "learning_rate": 8.759053733802083e-06, "loss": 100.2831, "step": 37900 }, { "epoch": 0.31360383835877076, "grad_norm": 934.783935546875, "learning_rate": 8.758123617908383e-06, "loss": 100.1143, "step": 37910 }, { "epoch": 0.31368656160813996, "grad_norm": 989.8101196289062, "learning_rate": 8.757193202998033e-06, "loss": 127.9963, "step": 37920 }, { "epoch": 0.3137692848575092, "grad_norm": 1011.3324584960938, "learning_rate": 8.756262489145061e-06, "loss": 112.5696, "step": 37930 }, { "epoch": 0.3138520081068784, "grad_norm": 925.4760131835938, "learning_rate": 8.755331476423526e-06, "loss": 79.9976, "step": 37940 }, { "epoch": 0.31393473135624766, "grad_norm": 932.2153930664062, "learning_rate": 8.754400164907496e-06, "loss": 118.1142, "step": 37950 }, { "epoch": 0.3140174546056169, "grad_norm": 869.6834106445312, "learning_rate": 8.753468554671078e-06, "loss": 122.9429, "step": 37960 }, { "epoch": 0.3141001778549861, "grad_norm": 1097.6431884765625, "learning_rate": 8.752536645788391e-06, "loss": 116.1235, "step": 37970 }, { "epoch": 0.31418290110435537, "grad_norm": 943.837158203125, "learning_rate": 8.751604438333587e-06, "loss": 120.1827, "step": 37980 }, { "epoch": 0.3142656243537246, "grad_norm": 965.2254028320312, "learning_rate": 8.750671932380834e-06, "loss": 111.8385, "step": 37990 }, { "epoch": 0.3143483476030938, "grad_norm": 2054.935546875, "learning_rate": 8.749739128004329e-06, "loss": 105.5353, "step": 38000 }, { "epoch": 0.3144310708524631, "grad_norm": 885.7263793945312, "learning_rate": 8.748806025278292e-06, "loss": 113.4429, "step": 38010 }, { "epoch": 0.31451379410183233, "grad_norm": 1407.1341552734375, "learning_rate": 8.747872624276963e-06, "loss": 99.107, "step": 38020 }, { "epoch": 0.31459651735120153, "grad_norm": 1092.1849365234375, "learning_rate": 8.746938925074609e-06, "loss": 130.1728, "step": 38030 }, { "epoch": 0.3146792406005708, "grad_norm": 977.42578125, "learning_rate": 8.746004927745522e-06, "loss": 116.9955, "step": 38040 }, { "epoch": 0.31476196384994004, "grad_norm": 813.2716064453125, "learning_rate": 8.745070632364014e-06, "loss": 103.2874, "step": 38050 }, { "epoch": 0.31484468709930924, "grad_norm": 823.4459838867188, "learning_rate": 8.744136039004422e-06, "loss": 122.7185, "step": 38060 }, { "epoch": 0.3149274103486785, "grad_norm": 675.5857543945312, "learning_rate": 8.743201147741112e-06, "loss": 117.528, "step": 38070 }, { "epoch": 0.31501013359804775, "grad_norm": 1719.105224609375, "learning_rate": 8.742265958648464e-06, "loss": 110.0581, "step": 38080 }, { "epoch": 0.31509285684741695, "grad_norm": 1066.7659912109375, "learning_rate": 8.741330471800888e-06, "loss": 89.2473, "step": 38090 }, { "epoch": 0.3151755800967862, "grad_norm": 776.9163818359375, "learning_rate": 8.740394687272817e-06, "loss": 124.1178, "step": 38100 }, { "epoch": 0.31525830334615546, "grad_norm": 760.9815063476562, "learning_rate": 8.739458605138706e-06, "loss": 119.1256, "step": 38110 }, { "epoch": 0.31534102659552465, "grad_norm": 974.4657592773438, "learning_rate": 8.738522225473036e-06, "loss": 105.6252, "step": 38120 }, { "epoch": 0.3154237498448939, "grad_norm": 634.0014038085938, "learning_rate": 8.737585548350312e-06, "loss": 119.6853, "step": 38130 }, { "epoch": 0.31550647309426316, "grad_norm": 765.5140380859375, "learning_rate": 8.736648573845057e-06, "loss": 99.3297, "step": 38140 }, { "epoch": 0.31558919634363236, "grad_norm": 1316.4825439453125, "learning_rate": 8.735711302031824e-06, "loss": 123.088, "step": 38150 }, { "epoch": 0.3156719195930016, "grad_norm": 1433.5438232421875, "learning_rate": 8.734773732985186e-06, "loss": 116.7357, "step": 38160 }, { "epoch": 0.31575464284237087, "grad_norm": 826.6362915039062, "learning_rate": 8.733835866779745e-06, "loss": 93.5203, "step": 38170 }, { "epoch": 0.31583736609174007, "grad_norm": 1368.983642578125, "learning_rate": 8.73289770349012e-06, "loss": 90.895, "step": 38180 }, { "epoch": 0.3159200893411093, "grad_norm": 764.3739013671875, "learning_rate": 8.731959243190955e-06, "loss": 115.1358, "step": 38190 }, { "epoch": 0.3160028125904786, "grad_norm": 1166.0986328125, "learning_rate": 8.73102048595692e-06, "loss": 110.7284, "step": 38200 }, { "epoch": 0.3160855358398478, "grad_norm": 900.78759765625, "learning_rate": 8.730081431862709e-06, "loss": 114.7286, "step": 38210 }, { "epoch": 0.31616825908921703, "grad_norm": 694.7711791992188, "learning_rate": 8.729142080983037e-06, "loss": 99.7621, "step": 38220 }, { "epoch": 0.3162509823385863, "grad_norm": 1478.10546875, "learning_rate": 8.728202433392645e-06, "loss": 103.1368, "step": 38230 }, { "epoch": 0.3163337055879555, "grad_norm": 739.2174682617188, "learning_rate": 8.727262489166295e-06, "loss": 91.7107, "step": 38240 }, { "epoch": 0.31641642883732474, "grad_norm": 1041.38525390625, "learning_rate": 8.726322248378775e-06, "loss": 133.2948, "step": 38250 }, { "epoch": 0.316499152086694, "grad_norm": 709.4502563476562, "learning_rate": 8.725381711104894e-06, "loss": 134.3007, "step": 38260 }, { "epoch": 0.3165818753360632, "grad_norm": 1081.0482177734375, "learning_rate": 8.724440877419487e-06, "loss": 107.7189, "step": 38270 }, { "epoch": 0.31666459858543244, "grad_norm": 1248.0484619140625, "learning_rate": 8.723499747397415e-06, "loss": 105.8039, "step": 38280 }, { "epoch": 0.3167473218348017, "grad_norm": 1431.8619384765625, "learning_rate": 8.722558321113555e-06, "loss": 108.0174, "step": 38290 }, { "epoch": 0.3168300450841709, "grad_norm": 972.2454223632812, "learning_rate": 8.721616598642812e-06, "loss": 124.3465, "step": 38300 }, { "epoch": 0.31691276833354015, "grad_norm": 1106.1824951171875, "learning_rate": 8.720674580060117e-06, "loss": 92.7966, "step": 38310 }, { "epoch": 0.31699549158290935, "grad_norm": 1259.550048828125, "learning_rate": 8.719732265440423e-06, "loss": 91.808, "step": 38320 }, { "epoch": 0.3170782148322786, "grad_norm": 1003.4591064453125, "learning_rate": 8.718789654858702e-06, "loss": 97.9086, "step": 38330 }, { "epoch": 0.31716093808164786, "grad_norm": 1087.510498046875, "learning_rate": 8.717846748389956e-06, "loss": 190.7288, "step": 38340 }, { "epoch": 0.31724366133101706, "grad_norm": 1307.7303466796875, "learning_rate": 8.716903546109208e-06, "loss": 100.8898, "step": 38350 }, { "epoch": 0.3173263845803863, "grad_norm": 389.17926025390625, "learning_rate": 8.715960048091502e-06, "loss": 105.3628, "step": 38360 }, { "epoch": 0.31740910782975557, "grad_norm": 634.9244995117188, "learning_rate": 8.715016254411908e-06, "loss": 93.6207, "step": 38370 }, { "epoch": 0.31749183107912476, "grad_norm": 1184.2578125, "learning_rate": 8.714072165145521e-06, "loss": 135.4022, "step": 38380 }, { "epoch": 0.317574554328494, "grad_norm": 619.5433349609375, "learning_rate": 8.713127780367458e-06, "loss": 82.4095, "step": 38390 }, { "epoch": 0.3176572775778633, "grad_norm": 1421.2760009765625, "learning_rate": 8.712183100152858e-06, "loss": 104.6683, "step": 38400 }, { "epoch": 0.31774000082723247, "grad_norm": 569.2383422851562, "learning_rate": 8.711238124576884e-06, "loss": 100.9128, "step": 38410 }, { "epoch": 0.3178227240766017, "grad_norm": 486.7202453613281, "learning_rate": 8.710292853714726e-06, "loss": 102.4042, "step": 38420 }, { "epoch": 0.317905447325971, "grad_norm": 1279.0054931640625, "learning_rate": 8.709347287641593e-06, "loss": 107.4948, "step": 38430 }, { "epoch": 0.3179881705753402, "grad_norm": 777.2363891601562, "learning_rate": 8.70840142643272e-06, "loss": 126.2411, "step": 38440 }, { "epoch": 0.31807089382470943, "grad_norm": 680.6844482421875, "learning_rate": 8.707455270163365e-06, "loss": 98.6142, "step": 38450 }, { "epoch": 0.3181536170740787, "grad_norm": 1288.6944580078125, "learning_rate": 8.70650881890881e-06, "loss": 125.0557, "step": 38460 }, { "epoch": 0.3182363403234479, "grad_norm": 760.6031494140625, "learning_rate": 8.705562072744358e-06, "loss": 138.251, "step": 38470 }, { "epoch": 0.31831906357281714, "grad_norm": 883.0706176757812, "learning_rate": 8.704615031745337e-06, "loss": 111.4153, "step": 38480 }, { "epoch": 0.3184017868221864, "grad_norm": 1131.5177001953125, "learning_rate": 8.703667695987102e-06, "loss": 113.8998, "step": 38490 }, { "epoch": 0.3184845100715556, "grad_norm": 973.4132690429688, "learning_rate": 8.702720065545024e-06, "loss": 106.239, "step": 38500 }, { "epoch": 0.31856723332092485, "grad_norm": 1401.8857421875, "learning_rate": 8.701772140494504e-06, "loss": 131.2176, "step": 38510 }, { "epoch": 0.3186499565702941, "grad_norm": 735.4816284179688, "learning_rate": 8.700823920910964e-06, "loss": 124.5568, "step": 38520 }, { "epoch": 0.3187326798196633, "grad_norm": 1129.6258544921875, "learning_rate": 8.699875406869848e-06, "loss": 103.7197, "step": 38530 }, { "epoch": 0.31881540306903255, "grad_norm": 802.9367065429688, "learning_rate": 8.69892659844663e-06, "loss": 129.3652, "step": 38540 }, { "epoch": 0.3188981263184018, "grad_norm": 1019.4291381835938, "learning_rate": 8.697977495716794e-06, "loss": 113.1963, "step": 38550 }, { "epoch": 0.318980849567771, "grad_norm": 1141.768798828125, "learning_rate": 8.697028098755863e-06, "loss": 75.0446, "step": 38560 }, { "epoch": 0.31906357281714026, "grad_norm": 561.0872192382812, "learning_rate": 8.69607840763937e-06, "loss": 107.2292, "step": 38570 }, { "epoch": 0.3191462960665095, "grad_norm": 567.6842651367188, "learning_rate": 8.695128422442882e-06, "loss": 105.8062, "step": 38580 }, { "epoch": 0.3192290193158787, "grad_norm": 1812.7880859375, "learning_rate": 8.694178143241984e-06, "loss": 116.8599, "step": 38590 }, { "epoch": 0.31931174256524797, "grad_norm": 821.3114013671875, "learning_rate": 8.693227570112285e-06, "loss": 113.9192, "step": 38600 }, { "epoch": 0.3193944658146172, "grad_norm": 1383.909423828125, "learning_rate": 8.692276703129421e-06, "loss": 123.0948, "step": 38610 }, { "epoch": 0.3194771890639864, "grad_norm": 559.3286743164062, "learning_rate": 8.691325542369041e-06, "loss": 81.4486, "step": 38620 }, { "epoch": 0.3195599123133557, "grad_norm": 1106.412109375, "learning_rate": 8.69037408790683e-06, "loss": 119.6945, "step": 38630 }, { "epoch": 0.31964263556272493, "grad_norm": 1364.677490234375, "learning_rate": 8.689422339818489e-06, "loss": 140.9282, "step": 38640 }, { "epoch": 0.31972535881209413, "grad_norm": 0.0, "learning_rate": 8.688470298179746e-06, "loss": 140.5661, "step": 38650 }, { "epoch": 0.3198080820614634, "grad_norm": 859.6204833984375, "learning_rate": 8.687517963066347e-06, "loss": 110.4718, "step": 38660 }, { "epoch": 0.3198908053108326, "grad_norm": 1258.269775390625, "learning_rate": 8.686565334554069e-06, "loss": 126.0004, "step": 38670 }, { "epoch": 0.31997352856020184, "grad_norm": 1039.6004638671875, "learning_rate": 8.685612412718704e-06, "loss": 119.8658, "step": 38680 }, { "epoch": 0.3200562518095711, "grad_norm": 923.8244018554688, "learning_rate": 8.684659197636076e-06, "loss": 124.2017, "step": 38690 }, { "epoch": 0.3201389750589403, "grad_norm": 1117.4451904296875, "learning_rate": 8.683705689382025e-06, "loss": 107.7295, "step": 38700 }, { "epoch": 0.32022169830830954, "grad_norm": 1363.1929931640625, "learning_rate": 8.682751888032419e-06, "loss": 99.3945, "step": 38710 }, { "epoch": 0.3203044215576788, "grad_norm": 701.418212890625, "learning_rate": 8.681797793663147e-06, "loss": 120.6914, "step": 38720 }, { "epoch": 0.320387144807048, "grad_norm": 1962.092041015625, "learning_rate": 8.680843406350122e-06, "loss": 105.0907, "step": 38730 }, { "epoch": 0.32046986805641725, "grad_norm": 1574.8798828125, "learning_rate": 8.679888726169277e-06, "loss": 123.2075, "step": 38740 }, { "epoch": 0.3205525913057865, "grad_norm": 979.2647094726562, "learning_rate": 8.678933753196577e-06, "loss": 117.9523, "step": 38750 }, { "epoch": 0.3206353145551557, "grad_norm": 961.4496459960938, "learning_rate": 8.677978487508002e-06, "loss": 130.495, "step": 38760 }, { "epoch": 0.32071803780452496, "grad_norm": 1239.4903564453125, "learning_rate": 8.677022929179558e-06, "loss": 116.15, "step": 38770 }, { "epoch": 0.3208007610538942, "grad_norm": 936.8592529296875, "learning_rate": 8.676067078287276e-06, "loss": 102.5058, "step": 38780 }, { "epoch": 0.3208834843032634, "grad_norm": 1102.019775390625, "learning_rate": 8.675110934907206e-06, "loss": 105.1739, "step": 38790 }, { "epoch": 0.32096620755263267, "grad_norm": 1137.979736328125, "learning_rate": 8.674154499115426e-06, "loss": 103.8995, "step": 38800 }, { "epoch": 0.3210489308020019, "grad_norm": 983.4441528320312, "learning_rate": 8.673197770988034e-06, "loss": 100.5983, "step": 38810 }, { "epoch": 0.3211316540513711, "grad_norm": 601.077392578125, "learning_rate": 8.672240750601152e-06, "loss": 100.5274, "step": 38820 }, { "epoch": 0.3212143773007404, "grad_norm": 897.4487915039062, "learning_rate": 8.67128343803093e-06, "loss": 99.9841, "step": 38830 }, { "epoch": 0.3212971005501096, "grad_norm": 1025.739013671875, "learning_rate": 8.670325833353532e-06, "loss": 89.5816, "step": 38840 }, { "epoch": 0.3213798237994788, "grad_norm": 677.2406005859375, "learning_rate": 8.669367936645152e-06, "loss": 105.6764, "step": 38850 }, { "epoch": 0.3214625470488481, "grad_norm": 912.070068359375, "learning_rate": 8.668409747982005e-06, "loss": 129.2276, "step": 38860 }, { "epoch": 0.32154527029821733, "grad_norm": 909.7315673828125, "learning_rate": 8.667451267440332e-06, "loss": 98.6507, "step": 38870 }, { "epoch": 0.32162799354758653, "grad_norm": 934.35400390625, "learning_rate": 8.666492495096391e-06, "loss": 121.8479, "step": 38880 }, { "epoch": 0.3217107167969558, "grad_norm": 1550.7830810546875, "learning_rate": 8.66553343102647e-06, "loss": 124.7869, "step": 38890 }, { "epoch": 0.32179344004632504, "grad_norm": 1913.998291015625, "learning_rate": 8.664574075306876e-06, "loss": 109.4713, "step": 38900 }, { "epoch": 0.32187616329569424, "grad_norm": 665.229736328125, "learning_rate": 8.66361442801394e-06, "loss": 94.5389, "step": 38910 }, { "epoch": 0.3219588865450635, "grad_norm": 762.2039794921875, "learning_rate": 8.662654489224018e-06, "loss": 94.5307, "step": 38920 }, { "epoch": 0.32204160979443275, "grad_norm": 1010.405029296875, "learning_rate": 8.661694259013489e-06, "loss": 143.722, "step": 38930 }, { "epoch": 0.32212433304380195, "grad_norm": 808.5379638671875, "learning_rate": 8.660733737458751e-06, "loss": 134.8724, "step": 38940 }, { "epoch": 0.3222070562931712, "grad_norm": 964.7113037109375, "learning_rate": 8.659772924636232e-06, "loss": 122.8288, "step": 38950 }, { "epoch": 0.32228977954254046, "grad_norm": 908.0009765625, "learning_rate": 8.658811820622376e-06, "loss": 90.802, "step": 38960 }, { "epoch": 0.32237250279190965, "grad_norm": 1564.2470703125, "learning_rate": 8.657850425493656e-06, "loss": 129.7668, "step": 38970 }, { "epoch": 0.3224552260412789, "grad_norm": 1255.0084228515625, "learning_rate": 8.656888739326564e-06, "loss": 96.6529, "step": 38980 }, { "epoch": 0.32253794929064816, "grad_norm": 960.5479125976562, "learning_rate": 8.65592676219762e-06, "loss": 103.2335, "step": 38990 }, { "epoch": 0.32262067254001736, "grad_norm": 815.9246826171875, "learning_rate": 8.65496449418336e-06, "loss": 94.21, "step": 39000 }, { "epoch": 0.3227033957893866, "grad_norm": 942.2803344726562, "learning_rate": 8.654001935360349e-06, "loss": 108.4447, "step": 39010 }, { "epoch": 0.32278611903875587, "grad_norm": 1212.7076416015625, "learning_rate": 8.653039085805174e-06, "loss": 97.4576, "step": 39020 }, { "epoch": 0.32286884228812507, "grad_norm": 1259.3065185546875, "learning_rate": 8.652075945594444e-06, "loss": 96.3901, "step": 39030 }, { "epoch": 0.3229515655374943, "grad_norm": 673.8052368164062, "learning_rate": 8.651112514804793e-06, "loss": 94.6694, "step": 39040 }, { "epoch": 0.3230342887868635, "grad_norm": 1086.0980224609375, "learning_rate": 8.650148793512874e-06, "loss": 161.4135, "step": 39050 }, { "epoch": 0.3231170120362328, "grad_norm": 1190.9241943359375, "learning_rate": 8.649184781795367e-06, "loss": 122.2091, "step": 39060 }, { "epoch": 0.32319973528560203, "grad_norm": 750.9871215820312, "learning_rate": 8.648220479728976e-06, "loss": 129.1647, "step": 39070 }, { "epoch": 0.32328245853497123, "grad_norm": 675.1224365234375, "learning_rate": 8.647255887390425e-06, "loss": 87.7561, "step": 39080 }, { "epoch": 0.3233651817843405, "grad_norm": 960.1796875, "learning_rate": 8.64629100485646e-06, "loss": 107.4958, "step": 39090 }, { "epoch": 0.32344790503370974, "grad_norm": 1386.735595703125, "learning_rate": 8.645325832203855e-06, "loss": 135.6421, "step": 39100 }, { "epoch": 0.32353062828307894, "grad_norm": 589.93896484375, "learning_rate": 8.644360369509403e-06, "loss": 102.7022, "step": 39110 }, { "epoch": 0.3236133515324482, "grad_norm": 1394.318603515625, "learning_rate": 8.64339461684992e-06, "loss": 109.0257, "step": 39120 }, { "epoch": 0.32369607478181744, "grad_norm": 1033.558349609375, "learning_rate": 8.64242857430225e-06, "loss": 120.0752, "step": 39130 }, { "epoch": 0.32377879803118664, "grad_norm": 877.220947265625, "learning_rate": 8.641462241943255e-06, "loss": 149.2554, "step": 39140 }, { "epoch": 0.3238615212805559, "grad_norm": 1799.2706298828125, "learning_rate": 8.640495619849821e-06, "loss": 106.8699, "step": 39150 }, { "epoch": 0.32394424452992515, "grad_norm": 354.8163146972656, "learning_rate": 8.639528708098858e-06, "loss": 138.8774, "step": 39160 }, { "epoch": 0.32402696777929435, "grad_norm": 734.3604125976562, "learning_rate": 8.6385615067673e-06, "loss": 113.2349, "step": 39170 }, { "epoch": 0.3241096910286636, "grad_norm": 1038.3038330078125, "learning_rate": 8.6375940159321e-06, "loss": 80.891, "step": 39180 }, { "epoch": 0.32419241427803286, "grad_norm": 1265.29638671875, "learning_rate": 8.63662623567024e-06, "loss": 100.5664, "step": 39190 }, { "epoch": 0.32427513752740206, "grad_norm": 975.9793701171875, "learning_rate": 8.63565816605872e-06, "loss": 98.3975, "step": 39200 }, { "epoch": 0.3243578607767713, "grad_norm": 1260.0736083984375, "learning_rate": 8.634689807174564e-06, "loss": 122.2016, "step": 39210 }, { "epoch": 0.32444058402614057, "grad_norm": 745.4339599609375, "learning_rate": 8.633721159094823e-06, "loss": 118.936, "step": 39220 }, { "epoch": 0.32452330727550976, "grad_norm": 803.7348022460938, "learning_rate": 8.632752221896562e-06, "loss": 76.6836, "step": 39230 }, { "epoch": 0.324606030524879, "grad_norm": 901.3794555664062, "learning_rate": 8.631782995656884e-06, "loss": 114.3698, "step": 39240 }, { "epoch": 0.3246887537742483, "grad_norm": 2110.507080078125, "learning_rate": 8.630813480452898e-06, "loss": 109.549, "step": 39250 }, { "epoch": 0.32477147702361747, "grad_norm": 868.3207397460938, "learning_rate": 8.629843676361747e-06, "loss": 147.7418, "step": 39260 }, { "epoch": 0.3248542002729867, "grad_norm": 776.3412475585938, "learning_rate": 8.628873583460593e-06, "loss": 126.7695, "step": 39270 }, { "epoch": 0.324936923522356, "grad_norm": 1359.6387939453125, "learning_rate": 8.627903201826622e-06, "loss": 120.8187, "step": 39280 }, { "epoch": 0.3250196467717252, "grad_norm": 1282.950439453125, "learning_rate": 8.626932531537042e-06, "loss": 123.0786, "step": 39290 }, { "epoch": 0.32510237002109443, "grad_norm": 530.9898071289062, "learning_rate": 8.625961572669087e-06, "loss": 120.1885, "step": 39300 }, { "epoch": 0.3251850932704637, "grad_norm": 997.0206298828125, "learning_rate": 8.62499032530001e-06, "loss": 120.9154, "step": 39310 }, { "epoch": 0.3252678165198329, "grad_norm": 676.1962890625, "learning_rate": 8.624018789507091e-06, "loss": 97.3104, "step": 39320 }, { "epoch": 0.32535053976920214, "grad_norm": 692.87255859375, "learning_rate": 8.62304696536763e-06, "loss": 118.6817, "step": 39330 }, { "epoch": 0.3254332630185714, "grad_norm": 980.8681030273438, "learning_rate": 8.622074852958946e-06, "loss": 112.1015, "step": 39340 }, { "epoch": 0.3255159862679406, "grad_norm": 1021.6939697265625, "learning_rate": 8.621102452358393e-06, "loss": 190.7402, "step": 39350 }, { "epoch": 0.32559870951730985, "grad_norm": 1223.068115234375, "learning_rate": 8.620129763643333e-06, "loss": 128.2917, "step": 39360 }, { "epoch": 0.3256814327666791, "grad_norm": 1219.437744140625, "learning_rate": 8.619156786891162e-06, "loss": 136.7339, "step": 39370 }, { "epoch": 0.3257641560160483, "grad_norm": 1412.034912109375, "learning_rate": 8.618183522179295e-06, "loss": 131.6702, "step": 39380 }, { "epoch": 0.32584687926541755, "grad_norm": 1203.2330322265625, "learning_rate": 8.617209969585171e-06, "loss": 88.7958, "step": 39390 }, { "epoch": 0.32592960251478675, "grad_norm": 809.6597290039062, "learning_rate": 8.616236129186252e-06, "loss": 102.6644, "step": 39400 }, { "epoch": 0.326012325764156, "grad_norm": 3100.604736328125, "learning_rate": 8.615262001060019e-06, "loss": 176.8819, "step": 39410 }, { "epoch": 0.32609504901352526, "grad_norm": 1474.5286865234375, "learning_rate": 8.61428758528398e-06, "loss": 123.6517, "step": 39420 }, { "epoch": 0.32617777226289446, "grad_norm": 893.0943603515625, "learning_rate": 8.613312881935667e-06, "loss": 118.6461, "step": 39430 }, { "epoch": 0.3262604955122637, "grad_norm": 1108.7327880859375, "learning_rate": 8.61233789109263e-06, "loss": 129.676, "step": 39440 }, { "epoch": 0.32634321876163297, "grad_norm": 799.1575317382812, "learning_rate": 8.611362612832445e-06, "loss": 109.5865, "step": 39450 }, { "epoch": 0.32642594201100217, "grad_norm": 1402.5484619140625, "learning_rate": 8.610387047232711e-06, "loss": 103.1031, "step": 39460 }, { "epoch": 0.3265086652603714, "grad_norm": 1590.6834716796875, "learning_rate": 8.609411194371049e-06, "loss": 114.6393, "step": 39470 }, { "epoch": 0.3265913885097407, "grad_norm": 1389.5260009765625, "learning_rate": 8.608435054325103e-06, "loss": 100.0405, "step": 39480 }, { "epoch": 0.3266741117591099, "grad_norm": 1510.9293212890625, "learning_rate": 8.60745862717254e-06, "loss": 78.9952, "step": 39490 }, { "epoch": 0.32675683500847913, "grad_norm": 2515.405029296875, "learning_rate": 8.606481912991052e-06, "loss": 125.8343, "step": 39500 }, { "epoch": 0.3268395582578484, "grad_norm": 1044.6246337890625, "learning_rate": 8.605504911858347e-06, "loss": 95.9947, "step": 39510 }, { "epoch": 0.3269222815072176, "grad_norm": 742.9393920898438, "learning_rate": 8.604527623852165e-06, "loss": 129.3403, "step": 39520 }, { "epoch": 0.32700500475658684, "grad_norm": 843.3123779296875, "learning_rate": 8.603550049050262e-06, "loss": 124.6452, "step": 39530 }, { "epoch": 0.3270877280059561, "grad_norm": 629.9082641601562, "learning_rate": 8.602572187530421e-06, "loss": 103.7542, "step": 39540 }, { "epoch": 0.3271704512553253, "grad_norm": 1079.95556640625, "learning_rate": 8.601594039370441e-06, "loss": 117.5058, "step": 39550 }, { "epoch": 0.32725317450469454, "grad_norm": 785.5013427734375, "learning_rate": 8.600615604648155e-06, "loss": 67.8067, "step": 39560 }, { "epoch": 0.3273358977540638, "grad_norm": 831.0355224609375, "learning_rate": 8.599636883441408e-06, "loss": 123.9131, "step": 39570 }, { "epoch": 0.327418621003433, "grad_norm": 2647.583251953125, "learning_rate": 8.598657875828078e-06, "loss": 107.162, "step": 39580 }, { "epoch": 0.32750134425280225, "grad_norm": 2026.9219970703125, "learning_rate": 8.597678581886055e-06, "loss": 130.6936, "step": 39590 }, { "epoch": 0.3275840675021715, "grad_norm": 991.1618041992188, "learning_rate": 8.596699001693257e-06, "loss": 107.1374, "step": 39600 }, { "epoch": 0.3276667907515407, "grad_norm": 1521.5604248046875, "learning_rate": 8.595719135327627e-06, "loss": 98.5977, "step": 39610 }, { "epoch": 0.32774951400090996, "grad_norm": 613.502685546875, "learning_rate": 8.594738982867126e-06, "loss": 100.9653, "step": 39620 }, { "epoch": 0.3278322372502792, "grad_norm": 1424.407470703125, "learning_rate": 8.593758544389743e-06, "loss": 119.1369, "step": 39630 }, { "epoch": 0.3279149604996484, "grad_norm": 519.1179809570312, "learning_rate": 8.592777819973486e-06, "loss": 121.2218, "step": 39640 }, { "epoch": 0.32799768374901767, "grad_norm": 1449.661865234375, "learning_rate": 8.591796809696386e-06, "loss": 114.1455, "step": 39650 }, { "epoch": 0.3280804069983869, "grad_norm": 986.8948364257812, "learning_rate": 8.590815513636498e-06, "loss": 111.6402, "step": 39660 }, { "epoch": 0.3281631302477561, "grad_norm": 839.0859375, "learning_rate": 8.5898339318719e-06, "loss": 85.4794, "step": 39670 }, { "epoch": 0.3282458534971254, "grad_norm": 1216.8238525390625, "learning_rate": 8.58885206448069e-06, "loss": 126.5229, "step": 39680 }, { "epoch": 0.3283285767464946, "grad_norm": 1210.4658203125, "learning_rate": 8.587869911540993e-06, "loss": 131.2425, "step": 39690 }, { "epoch": 0.3284112999958638, "grad_norm": 638.7323608398438, "learning_rate": 8.586887473130951e-06, "loss": 117.0074, "step": 39700 }, { "epoch": 0.3284940232452331, "grad_norm": 1674.9326171875, "learning_rate": 8.585904749328736e-06, "loss": 101.3178, "step": 39710 }, { "epoch": 0.32857674649460233, "grad_norm": 730.9718627929688, "learning_rate": 8.584921740212537e-06, "loss": 79.7682, "step": 39720 }, { "epoch": 0.32865946974397153, "grad_norm": 849.2908325195312, "learning_rate": 8.583938445860569e-06, "loss": 134.0528, "step": 39730 }, { "epoch": 0.3287421929933408, "grad_norm": 1976.713134765625, "learning_rate": 8.582954866351065e-06, "loss": 109.2086, "step": 39740 }, { "epoch": 0.32882491624271004, "grad_norm": 1140.69140625, "learning_rate": 8.581971001762287e-06, "loss": 115.7576, "step": 39750 }, { "epoch": 0.32890763949207924, "grad_norm": 1253.4771728515625, "learning_rate": 8.580986852172514e-06, "loss": 99.3701, "step": 39760 }, { "epoch": 0.3289903627414485, "grad_norm": 1577.8370361328125, "learning_rate": 8.580002417660054e-06, "loss": 137.5488, "step": 39770 }, { "epoch": 0.3290730859908177, "grad_norm": 759.9320068359375, "learning_rate": 8.579017698303228e-06, "loss": 110.6118, "step": 39780 }, { "epoch": 0.32915580924018695, "grad_norm": 807.0444946289062, "learning_rate": 8.578032694180394e-06, "loss": 108.5404, "step": 39790 }, { "epoch": 0.3292385324895562, "grad_norm": 901.5609741210938, "learning_rate": 8.577047405369916e-06, "loss": 92.3528, "step": 39800 }, { "epoch": 0.3293212557389254, "grad_norm": 815.9768676757812, "learning_rate": 8.576061831950193e-06, "loss": 116.8808, "step": 39810 }, { "epoch": 0.32940397898829465, "grad_norm": 676.6227416992188, "learning_rate": 8.575075973999642e-06, "loss": 104.0332, "step": 39820 }, { "epoch": 0.3294867022376639, "grad_norm": 655.098876953125, "learning_rate": 8.574089831596703e-06, "loss": 114.4098, "step": 39830 }, { "epoch": 0.3295694254870331, "grad_norm": 1169.68359375, "learning_rate": 8.57310340481984e-06, "loss": 88.758, "step": 39840 }, { "epoch": 0.32965214873640236, "grad_norm": 635.2750244140625, "learning_rate": 8.572116693747537e-06, "loss": 98.1875, "step": 39850 }, { "epoch": 0.3297348719857716, "grad_norm": 797.3588256835938, "learning_rate": 8.571129698458302e-06, "loss": 101.033, "step": 39860 }, { "epoch": 0.3298175952351408, "grad_norm": 1276.1683349609375, "learning_rate": 8.570142419030668e-06, "loss": 111.7359, "step": 39870 }, { "epoch": 0.32990031848451007, "grad_norm": 700.5169677734375, "learning_rate": 8.569154855543184e-06, "loss": 101.829, "step": 39880 }, { "epoch": 0.3299830417338793, "grad_norm": 1665.83984375, "learning_rate": 8.56816700807443e-06, "loss": 120.2223, "step": 39890 }, { "epoch": 0.3300657649832485, "grad_norm": 619.46435546875, "learning_rate": 8.567178876703002e-06, "loss": 101.4117, "step": 39900 }, { "epoch": 0.3301484882326178, "grad_norm": 1413.1790771484375, "learning_rate": 8.566190461507521e-06, "loss": 108.3938, "step": 39910 }, { "epoch": 0.33023121148198703, "grad_norm": 917.2999877929688, "learning_rate": 8.565201762566632e-06, "loss": 80.6623, "step": 39920 }, { "epoch": 0.33031393473135623, "grad_norm": 787.8756713867188, "learning_rate": 8.564212779959003e-06, "loss": 130.3724, "step": 39930 }, { "epoch": 0.3303966579807255, "grad_norm": 649.1900634765625, "learning_rate": 8.563223513763319e-06, "loss": 107.5673, "step": 39940 }, { "epoch": 0.33047938123009474, "grad_norm": 656.0354614257812, "learning_rate": 8.562233964058294e-06, "loss": 138.9998, "step": 39950 }, { "epoch": 0.33056210447946394, "grad_norm": 1630.0479736328125, "learning_rate": 8.561244130922658e-06, "loss": 79.6873, "step": 39960 }, { "epoch": 0.3306448277288332, "grad_norm": 908.9981689453125, "learning_rate": 8.560254014435172e-06, "loss": 124.0382, "step": 39970 }, { "epoch": 0.33072755097820244, "grad_norm": 1071.929931640625, "learning_rate": 8.559263614674615e-06, "loss": 102.3747, "step": 39980 }, { "epoch": 0.33081027422757164, "grad_norm": 827.8428344726562, "learning_rate": 8.558272931719785e-06, "loss": 100.2324, "step": 39990 }, { "epoch": 0.3308929974769409, "grad_norm": 2593.23779296875, "learning_rate": 8.557281965649508e-06, "loss": 107.4415, "step": 40000 }, { "epoch": 0.33097572072631015, "grad_norm": 1010.6577758789062, "learning_rate": 8.556290716542632e-06, "loss": 84.2611, "step": 40010 }, { "epoch": 0.33105844397567935, "grad_norm": 787.0457153320312, "learning_rate": 8.555299184478026e-06, "loss": 129.8781, "step": 40020 }, { "epoch": 0.3311411672250486, "grad_norm": 953.2777099609375, "learning_rate": 8.554307369534577e-06, "loss": 103.5916, "step": 40030 }, { "epoch": 0.33122389047441786, "grad_norm": 1413.6817626953125, "learning_rate": 8.553315271791207e-06, "loss": 92.5186, "step": 40040 }, { "epoch": 0.33130661372378706, "grad_norm": 1188.9342041015625, "learning_rate": 8.552322891326846e-06, "loss": 98.2379, "step": 40050 }, { "epoch": 0.3313893369731563, "grad_norm": 1236.0517578125, "learning_rate": 8.551330228220454e-06, "loss": 107.0516, "step": 40060 }, { "epoch": 0.33147206022252557, "grad_norm": 919.5791015625, "learning_rate": 8.550337282551016e-06, "loss": 101.3186, "step": 40070 }, { "epoch": 0.33155478347189477, "grad_norm": 1503.684326171875, "learning_rate": 8.549344054397533e-06, "loss": 108.6517, "step": 40080 }, { "epoch": 0.331637506721264, "grad_norm": 419.35028076171875, "learning_rate": 8.548350543839034e-06, "loss": 85.9801, "step": 40090 }, { "epoch": 0.3317202299706333, "grad_norm": 1511.6007080078125, "learning_rate": 8.547356750954568e-06, "loss": 109.9655, "step": 40100 }, { "epoch": 0.3318029532200025, "grad_norm": 811.8706665039062, "learning_rate": 8.546362675823204e-06, "loss": 186.2005, "step": 40110 }, { "epoch": 0.3318856764693717, "grad_norm": 910.4181518554688, "learning_rate": 8.545368318524036e-06, "loss": 113.1569, "step": 40120 }, { "epoch": 0.3319683997187409, "grad_norm": 841.266357421875, "learning_rate": 8.544373679136184e-06, "loss": 102.7385, "step": 40130 }, { "epoch": 0.3320511229681102, "grad_norm": 1087.2705078125, "learning_rate": 8.543378757738785e-06, "loss": 83.0132, "step": 40140 }, { "epoch": 0.33213384621747943, "grad_norm": 844.1511840820312, "learning_rate": 8.542383554411e-06, "loss": 97.4727, "step": 40150 }, { "epoch": 0.33221656946684863, "grad_norm": 1138.1463623046875, "learning_rate": 8.541388069232012e-06, "loss": 96.1207, "step": 40160 }, { "epoch": 0.3322992927162179, "grad_norm": 1386.313232421875, "learning_rate": 8.54039230228103e-06, "loss": 121.9133, "step": 40170 }, { "epoch": 0.33238201596558714, "grad_norm": 1644.196044921875, "learning_rate": 8.53939625363728e-06, "loss": 110.2846, "step": 40180 }, { "epoch": 0.33246473921495634, "grad_norm": 663.9628295898438, "learning_rate": 8.538399923380011e-06, "loss": 147.1378, "step": 40190 }, { "epoch": 0.3325474624643256, "grad_norm": 921.0543212890625, "learning_rate": 8.537403311588502e-06, "loss": 94.7127, "step": 40200 }, { "epoch": 0.33263018571369485, "grad_norm": 1512.567626953125, "learning_rate": 8.536406418342044e-06, "loss": 87.8837, "step": 40210 }, { "epoch": 0.33271290896306405, "grad_norm": 1098.78369140625, "learning_rate": 8.53540924371996e-06, "loss": 108.6445, "step": 40220 }, { "epoch": 0.3327956322124333, "grad_norm": 1846.2921142578125, "learning_rate": 8.534411787801586e-06, "loss": 95.6519, "step": 40230 }, { "epoch": 0.33287835546180256, "grad_norm": 1103.0107421875, "learning_rate": 8.533414050666287e-06, "loss": 109.1561, "step": 40240 }, { "epoch": 0.33296107871117175, "grad_norm": 903.0642700195312, "learning_rate": 8.532416032393447e-06, "loss": 99.2833, "step": 40250 }, { "epoch": 0.333043801960541, "grad_norm": 1340.9583740234375, "learning_rate": 8.531417733062476e-06, "loss": 116.9413, "step": 40260 }, { "epoch": 0.33312652520991026, "grad_norm": 1119.4525146484375, "learning_rate": 8.530419152752804e-06, "loss": 124.4811, "step": 40270 }, { "epoch": 0.33320924845927946, "grad_norm": 1566.3560791015625, "learning_rate": 8.529420291543882e-06, "loss": 127.6215, "step": 40280 }, { "epoch": 0.3332919717086487, "grad_norm": 1316.3895263671875, "learning_rate": 8.528421149515185e-06, "loss": 107.4906, "step": 40290 }, { "epoch": 0.33337469495801797, "grad_norm": 825.5098266601562, "learning_rate": 8.52742172674621e-06, "loss": 129.04, "step": 40300 }, { "epoch": 0.33345741820738717, "grad_norm": 1096.2099609375, "learning_rate": 8.526422023316478e-06, "loss": 91.2496, "step": 40310 }, { "epoch": 0.3335401414567564, "grad_norm": 781.985107421875, "learning_rate": 8.525422039305529e-06, "loss": 124.4936, "step": 40320 }, { "epoch": 0.3336228647061257, "grad_norm": 1158.1458740234375, "learning_rate": 8.524421774792926e-06, "loss": 84.0795, "step": 40330 }, { "epoch": 0.3337055879554949, "grad_norm": 1207.7098388671875, "learning_rate": 8.52342122985826e-06, "loss": 91.0112, "step": 40340 }, { "epoch": 0.33378831120486413, "grad_norm": 847.1415405273438, "learning_rate": 8.522420404581135e-06, "loss": 101.6437, "step": 40350 }, { "epoch": 0.3338710344542334, "grad_norm": 995.0086669921875, "learning_rate": 8.521419299041185e-06, "loss": 93.0817, "step": 40360 }, { "epoch": 0.3339537577036026, "grad_norm": 892.8756103515625, "learning_rate": 8.520417913318065e-06, "loss": 110.2036, "step": 40370 }, { "epoch": 0.33403648095297184, "grad_norm": 1183.2650146484375, "learning_rate": 8.519416247491445e-06, "loss": 126.0844, "step": 40380 }, { "epoch": 0.3341192042023411, "grad_norm": 605.9612426757812, "learning_rate": 8.518414301641027e-06, "loss": 69.7784, "step": 40390 }, { "epoch": 0.3342019274517103, "grad_norm": 1190.9295654296875, "learning_rate": 8.517412075846529e-06, "loss": 138.7514, "step": 40400 }, { "epoch": 0.33428465070107954, "grad_norm": 792.2731323242188, "learning_rate": 8.516409570187698e-06, "loss": 98.212, "step": 40410 }, { "epoch": 0.3343673739504488, "grad_norm": 820.3512573242188, "learning_rate": 8.515406784744294e-06, "loss": 87.1192, "step": 40420 }, { "epoch": 0.334450097199818, "grad_norm": 1030.4779052734375, "learning_rate": 8.514403719596104e-06, "loss": 112.4568, "step": 40430 }, { "epoch": 0.33453282044918725, "grad_norm": 649.7733764648438, "learning_rate": 8.513400374822942e-06, "loss": 129.4392, "step": 40440 }, { "epoch": 0.3346155436985565, "grad_norm": 1169.7542724609375, "learning_rate": 8.512396750504635e-06, "loss": 96.2116, "step": 40450 }, { "epoch": 0.3346982669479257, "grad_norm": 1340.5599365234375, "learning_rate": 8.511392846721037e-06, "loss": 130.6511, "step": 40460 }, { "epoch": 0.33478099019729496, "grad_norm": 1084.3248291015625, "learning_rate": 8.510388663552027e-06, "loss": 96.2522, "step": 40470 }, { "epoch": 0.33486371344666416, "grad_norm": 940.4993896484375, "learning_rate": 8.509384201077502e-06, "loss": 182.6661, "step": 40480 }, { "epoch": 0.3349464366960334, "grad_norm": 473.4619445800781, "learning_rate": 8.508379459377381e-06, "loss": 98.2326, "step": 40490 }, { "epoch": 0.33502915994540267, "grad_norm": 915.9678344726562, "learning_rate": 8.507374438531606e-06, "loss": 90.029, "step": 40500 }, { "epoch": 0.33511188319477186, "grad_norm": 644.4666748046875, "learning_rate": 8.506369138620148e-06, "loss": 160.186, "step": 40510 }, { "epoch": 0.3351946064441411, "grad_norm": 771.171875, "learning_rate": 8.505363559722985e-06, "loss": 96.5032, "step": 40520 }, { "epoch": 0.3352773296935104, "grad_norm": 575.1885375976562, "learning_rate": 8.504357701920134e-06, "loss": 78.7146, "step": 40530 }, { "epoch": 0.33536005294287957, "grad_norm": 728.8525390625, "learning_rate": 8.503351565291622e-06, "loss": 130.1776, "step": 40540 }, { "epoch": 0.3354427761922488, "grad_norm": 1049.326416015625, "learning_rate": 8.502345149917506e-06, "loss": 91.4142, "step": 40550 }, { "epoch": 0.3355254994416181, "grad_norm": 1567.1068115234375, "learning_rate": 8.501338455877859e-06, "loss": 128.1109, "step": 40560 }, { "epoch": 0.3356082226909873, "grad_norm": 639.6390380859375, "learning_rate": 8.50033148325278e-06, "loss": 89.2162, "step": 40570 }, { "epoch": 0.33569094594035653, "grad_norm": 1309.4464111328125, "learning_rate": 8.499324232122389e-06, "loss": 119.5868, "step": 40580 }, { "epoch": 0.3357736691897258, "grad_norm": 961.0704345703125, "learning_rate": 8.498316702566828e-06, "loss": 108.5671, "step": 40590 }, { "epoch": 0.335856392439095, "grad_norm": 1294.3653564453125, "learning_rate": 8.497308894666263e-06, "loss": 114.0025, "step": 40600 }, { "epoch": 0.33593911568846424, "grad_norm": 1210.564697265625, "learning_rate": 8.496300808500878e-06, "loss": 122.3642, "step": 40610 }, { "epoch": 0.3360218389378335, "grad_norm": 1512.3568115234375, "learning_rate": 8.495292444150887e-06, "loss": 146.3031, "step": 40620 }, { "epoch": 0.3361045621872027, "grad_norm": 956.16162109375, "learning_rate": 8.494283801696514e-06, "loss": 140.3855, "step": 40630 }, { "epoch": 0.33618728543657195, "grad_norm": 862.5502319335938, "learning_rate": 8.493274881218017e-06, "loss": 92.3681, "step": 40640 }, { "epoch": 0.3362700086859412, "grad_norm": 715.0096435546875, "learning_rate": 8.49226568279567e-06, "loss": 93.8356, "step": 40650 }, { "epoch": 0.3363527319353104, "grad_norm": 858.4218139648438, "learning_rate": 8.49125620650977e-06, "loss": 89.4689, "step": 40660 }, { "epoch": 0.33643545518467965, "grad_norm": 797.36328125, "learning_rate": 8.490246452440636e-06, "loss": 131.3191, "step": 40670 }, { "epoch": 0.3365181784340489, "grad_norm": 887.1189575195312, "learning_rate": 8.48923642066861e-06, "loss": 143.5478, "step": 40680 }, { "epoch": 0.3366009016834181, "grad_norm": 688.75732421875, "learning_rate": 8.488226111274055e-06, "loss": 129.7013, "step": 40690 }, { "epoch": 0.33668362493278736, "grad_norm": 770.3273315429688, "learning_rate": 8.487215524337357e-06, "loss": 77.0376, "step": 40700 }, { "epoch": 0.3367663481821566, "grad_norm": 960.6439819335938, "learning_rate": 8.486204659938924e-06, "loss": 127.0703, "step": 40710 }, { "epoch": 0.3368490714315258, "grad_norm": 764.961669921875, "learning_rate": 8.485193518159186e-06, "loss": 111.9176, "step": 40720 }, { "epoch": 0.33693179468089507, "grad_norm": 900.38134765625, "learning_rate": 8.484182099078596e-06, "loss": 112.1536, "step": 40730 }, { "epoch": 0.3370145179302643, "grad_norm": 960.113525390625, "learning_rate": 8.483170402777624e-06, "loss": 127.3206, "step": 40740 }, { "epoch": 0.3370972411796335, "grad_norm": 1646.762939453125, "learning_rate": 8.482158429336769e-06, "loss": 143.5467, "step": 40750 }, { "epoch": 0.3371799644290028, "grad_norm": 985.1450805664062, "learning_rate": 8.48114617883655e-06, "loss": 114.6298, "step": 40760 }, { "epoch": 0.33726268767837203, "grad_norm": 759.6780395507812, "learning_rate": 8.480133651357507e-06, "loss": 116.8154, "step": 40770 }, { "epoch": 0.33734541092774123, "grad_norm": 885.9656982421875, "learning_rate": 8.479120846980197e-06, "loss": 108.7685, "step": 40780 }, { "epoch": 0.3374281341771105, "grad_norm": 575.7235717773438, "learning_rate": 8.478107765785212e-06, "loss": 88.1911, "step": 40790 }, { "epoch": 0.33751085742647974, "grad_norm": 666.4598388671875, "learning_rate": 8.477094407853153e-06, "loss": 69.0146, "step": 40800 }, { "epoch": 0.33759358067584894, "grad_norm": 645.81201171875, "learning_rate": 8.47608077326465e-06, "loss": 94.4932, "step": 40810 }, { "epoch": 0.3376763039252182, "grad_norm": 994.3779907226562, "learning_rate": 8.475066862100352e-06, "loss": 88.9415, "step": 40820 }, { "epoch": 0.33775902717458745, "grad_norm": 759.6505737304688, "learning_rate": 8.474052674440934e-06, "loss": 117.1598, "step": 40830 }, { "epoch": 0.33784175042395664, "grad_norm": 869.6111450195312, "learning_rate": 8.473038210367086e-06, "loss": 78.2449, "step": 40840 }, { "epoch": 0.3379244736733259, "grad_norm": 947.123046875, "learning_rate": 8.47202346995953e-06, "loss": 93.2359, "step": 40850 }, { "epoch": 0.3380071969226951, "grad_norm": 1361.4940185546875, "learning_rate": 8.471008453298998e-06, "loss": 134.8301, "step": 40860 }, { "epoch": 0.33808992017206435, "grad_norm": 729.060546875, "learning_rate": 8.469993160466254e-06, "loss": 94.2659, "step": 40870 }, { "epoch": 0.3381726434214336, "grad_norm": 643.9381713867188, "learning_rate": 8.46897759154208e-06, "loss": 88.6729, "step": 40880 }, { "epoch": 0.3382553666708028, "grad_norm": 927.8340454101562, "learning_rate": 8.467961746607279e-06, "loss": 127.7991, "step": 40890 }, { "epoch": 0.33833808992017206, "grad_norm": 604.8875122070312, "learning_rate": 8.466945625742678e-06, "loss": 76.509, "step": 40900 }, { "epoch": 0.3384208131695413, "grad_norm": 672.4412841796875, "learning_rate": 8.465929229029124e-06, "loss": 165.3088, "step": 40910 }, { "epoch": 0.3385035364189105, "grad_norm": 649.953369140625, "learning_rate": 8.464912556547486e-06, "loss": 112.9105, "step": 40920 }, { "epoch": 0.33858625966827977, "grad_norm": 1930.8785400390625, "learning_rate": 8.46389560837866e-06, "loss": 108.5322, "step": 40930 }, { "epoch": 0.338668982917649, "grad_norm": 622.740966796875, "learning_rate": 8.462878384603558e-06, "loss": 117.5824, "step": 40940 }, { "epoch": 0.3387517061670182, "grad_norm": 474.0765686035156, "learning_rate": 8.461860885303116e-06, "loss": 99.5456, "step": 40950 }, { "epoch": 0.3388344294163875, "grad_norm": 952.2894287109375, "learning_rate": 8.460843110558287e-06, "loss": 124.9169, "step": 40960 }, { "epoch": 0.3389171526657567, "grad_norm": 654.9668579101562, "learning_rate": 8.459825060450058e-06, "loss": 90.4174, "step": 40970 }, { "epoch": 0.3389998759151259, "grad_norm": 1069.80029296875, "learning_rate": 8.458806735059428e-06, "loss": 134.0334, "step": 40980 }, { "epoch": 0.3390825991644952, "grad_norm": 657.5958862304688, "learning_rate": 8.45778813446742e-06, "loss": 97.927, "step": 40990 }, { "epoch": 0.33916532241386443, "grad_norm": 1148.0079345703125, "learning_rate": 8.456769258755078e-06, "loss": 111.504, "step": 41000 }, { "epoch": 0.33924804566323363, "grad_norm": 1073.7718505859375, "learning_rate": 8.455750108003468e-06, "loss": 78.7796, "step": 41010 }, { "epoch": 0.3393307689126029, "grad_norm": 530.935302734375, "learning_rate": 8.454730682293686e-06, "loss": 76.4729, "step": 41020 }, { "epoch": 0.33941349216197214, "grad_norm": 584.3358764648438, "learning_rate": 8.453710981706838e-06, "loss": 100.3047, "step": 41030 }, { "epoch": 0.33949621541134134, "grad_norm": 1019.848388671875, "learning_rate": 8.452691006324055e-06, "loss": 101.324, "step": 41040 }, { "epoch": 0.3395789386607106, "grad_norm": 991.2681274414062, "learning_rate": 8.451670756226496e-06, "loss": 75.0817, "step": 41050 }, { "epoch": 0.33966166191007985, "grad_norm": 730.0400390625, "learning_rate": 8.450650231495336e-06, "loss": 85.305, "step": 41060 }, { "epoch": 0.33974438515944905, "grad_norm": 929.1596069335938, "learning_rate": 8.449629432211774e-06, "loss": 92.8536, "step": 41070 }, { "epoch": 0.3398271084088183, "grad_norm": 1377.75146484375, "learning_rate": 8.44860835845703e-06, "loss": 103.3764, "step": 41080 }, { "epoch": 0.33990983165818756, "grad_norm": 522.6357421875, "learning_rate": 8.447587010312343e-06, "loss": 107.57, "step": 41090 }, { "epoch": 0.33999255490755675, "grad_norm": 2292.6142578125, "learning_rate": 8.44656538785898e-06, "loss": 135.6061, "step": 41100 }, { "epoch": 0.340075278156926, "grad_norm": 419.8533630371094, "learning_rate": 8.44554349117823e-06, "loss": 80.6014, "step": 41110 }, { "epoch": 0.34015800140629526, "grad_norm": 1163.7928466796875, "learning_rate": 8.444521320351397e-06, "loss": 110.4075, "step": 41120 }, { "epoch": 0.34024072465566446, "grad_norm": 942.8880615234375, "learning_rate": 8.44349887545981e-06, "loss": 118.3985, "step": 41130 }, { "epoch": 0.3403234479050337, "grad_norm": 509.7681884765625, "learning_rate": 8.442476156584818e-06, "loss": 133.2833, "step": 41140 }, { "epoch": 0.34040617115440297, "grad_norm": 1343.72705078125, "learning_rate": 8.4414531638078e-06, "loss": 134.3201, "step": 41150 }, { "epoch": 0.34048889440377217, "grad_norm": 1334.9111328125, "learning_rate": 8.440429897210148e-06, "loss": 94.3114, "step": 41160 }, { "epoch": 0.3405716176531414, "grad_norm": 918.8824462890625, "learning_rate": 8.439406356873279e-06, "loss": 105.6756, "step": 41170 }, { "epoch": 0.3406543409025107, "grad_norm": 435.05010986328125, "learning_rate": 8.43838254287863e-06, "loss": 86.3829, "step": 41180 }, { "epoch": 0.3407370641518799, "grad_norm": 934.824462890625, "learning_rate": 8.43735845530766e-06, "loss": 122.4491, "step": 41190 }, { "epoch": 0.34081978740124913, "grad_norm": 1095.854248046875, "learning_rate": 8.436334094241855e-06, "loss": 110.3371, "step": 41200 }, { "epoch": 0.34090251065061833, "grad_norm": 1048.58154296875, "learning_rate": 8.435309459762718e-06, "loss": 135.8438, "step": 41210 }, { "epoch": 0.3409852338999876, "grad_norm": 722.0227661132812, "learning_rate": 8.434284551951772e-06, "loss": 86.2307, "step": 41220 }, { "epoch": 0.34106795714935684, "grad_norm": 835.3677368164062, "learning_rate": 8.433259370890565e-06, "loss": 79.5151, "step": 41230 }, { "epoch": 0.34115068039872604, "grad_norm": 1341.2940673828125, "learning_rate": 8.432233916660669e-06, "loss": 102.6455, "step": 41240 }, { "epoch": 0.3412334036480953, "grad_norm": 972.4404907226562, "learning_rate": 8.43120818934367e-06, "loss": 105.9913, "step": 41250 }, { "epoch": 0.34131612689746454, "grad_norm": 752.3978881835938, "learning_rate": 8.43018218902118e-06, "loss": 97.6527, "step": 41260 }, { "epoch": 0.34139885014683374, "grad_norm": 744.5557250976562, "learning_rate": 8.429155915774839e-06, "loss": 98.6538, "step": 41270 }, { "epoch": 0.341481573396203, "grad_norm": 1084.3350830078125, "learning_rate": 8.428129369686299e-06, "loss": 96.4803, "step": 41280 }, { "epoch": 0.34156429664557225, "grad_norm": 887.330078125, "learning_rate": 8.427102550837238e-06, "loss": 111.9868, "step": 41290 }, { "epoch": 0.34164701989494145, "grad_norm": 869.30908203125, "learning_rate": 8.426075459309356e-06, "loss": 120.3469, "step": 41300 }, { "epoch": 0.3417297431443107, "grad_norm": 2174.565673828125, "learning_rate": 8.42504809518437e-06, "loss": 115.0281, "step": 41310 }, { "epoch": 0.34181246639367996, "grad_norm": 939.2781372070312, "learning_rate": 8.42402045854403e-06, "loss": 118.3146, "step": 41320 }, { "epoch": 0.34189518964304916, "grad_norm": 962.0004272460938, "learning_rate": 8.422992549470094e-06, "loss": 111.6336, "step": 41330 }, { "epoch": 0.3419779128924184, "grad_norm": 856.4796142578125, "learning_rate": 8.42196436804435e-06, "loss": 89.8677, "step": 41340 }, { "epoch": 0.34206063614178767, "grad_norm": 882.47509765625, "learning_rate": 8.420935914348607e-06, "loss": 109.2613, "step": 41350 }, { "epoch": 0.34214335939115686, "grad_norm": 1152.5184326171875, "learning_rate": 8.419907188464691e-06, "loss": 83.5429, "step": 41360 }, { "epoch": 0.3422260826405261, "grad_norm": 1086.84521484375, "learning_rate": 8.418878190474459e-06, "loss": 107.8546, "step": 41370 }, { "epoch": 0.3423088058898954, "grad_norm": 1217.004150390625, "learning_rate": 8.417848920459778e-06, "loss": 137.2482, "step": 41380 }, { "epoch": 0.34239152913926457, "grad_norm": 706.2305297851562, "learning_rate": 8.416819378502543e-06, "loss": 74.3434, "step": 41390 }, { "epoch": 0.3424742523886338, "grad_norm": 1614.1566162109375, "learning_rate": 8.415789564684673e-06, "loss": 142.1887, "step": 41400 }, { "epoch": 0.3425569756380031, "grad_norm": 875.3898315429688, "learning_rate": 8.414759479088102e-06, "loss": 97.1488, "step": 41410 }, { "epoch": 0.3426396988873723, "grad_norm": 1026.1566162109375, "learning_rate": 8.413729121794794e-06, "loss": 130.0628, "step": 41420 }, { "epoch": 0.34272242213674153, "grad_norm": 977.263427734375, "learning_rate": 8.412698492886723e-06, "loss": 78.8849, "step": 41430 }, { "epoch": 0.3428051453861108, "grad_norm": 1244.2081298828125, "learning_rate": 8.411667592445898e-06, "loss": 142.179, "step": 41440 }, { "epoch": 0.34288786863548, "grad_norm": 751.5814819335938, "learning_rate": 8.410636420554337e-06, "loss": 95.5533, "step": 41450 }, { "epoch": 0.34297059188484924, "grad_norm": 786.3610229492188, "learning_rate": 8.409604977294093e-06, "loss": 103.3417, "step": 41460 }, { "epoch": 0.3430533151342185, "grad_norm": 931.486572265625, "learning_rate": 8.408573262747225e-06, "loss": 141.6174, "step": 41470 }, { "epoch": 0.3431360383835877, "grad_norm": 838.9139404296875, "learning_rate": 8.407541276995828e-06, "loss": 105.8545, "step": 41480 }, { "epoch": 0.34321876163295695, "grad_norm": 654.8321533203125, "learning_rate": 8.40650902012201e-06, "loss": 107.5087, "step": 41490 }, { "epoch": 0.3433014848823262, "grad_norm": 766.4340209960938, "learning_rate": 8.405476492207902e-06, "loss": 113.7733, "step": 41500 }, { "epoch": 0.3433842081316954, "grad_norm": 1260.985595703125, "learning_rate": 8.404443693335658e-06, "loss": 108.2389, "step": 41510 }, { "epoch": 0.34346693138106466, "grad_norm": 777.0344848632812, "learning_rate": 8.403410623587454e-06, "loss": 112.1793, "step": 41520 }, { "epoch": 0.3435496546304339, "grad_norm": 836.8817749023438, "learning_rate": 8.402377283045487e-06, "loss": 121.241, "step": 41530 }, { "epoch": 0.3436323778798031, "grad_norm": 675.0514526367188, "learning_rate": 8.401343671791974e-06, "loss": 121.9953, "step": 41540 }, { "epoch": 0.34371510112917236, "grad_norm": 1098.513916015625, "learning_rate": 8.400309789909155e-06, "loss": 129.7842, "step": 41550 }, { "epoch": 0.3437978243785416, "grad_norm": 728.864013671875, "learning_rate": 8.399275637479291e-06, "loss": 99.4059, "step": 41560 }, { "epoch": 0.3438805476279108, "grad_norm": 744.5280151367188, "learning_rate": 8.398241214584666e-06, "loss": 115.1609, "step": 41570 }, { "epoch": 0.34396327087728007, "grad_norm": 938.07080078125, "learning_rate": 8.397206521307584e-06, "loss": 102.9427, "step": 41580 }, { "epoch": 0.34404599412664927, "grad_norm": 1658.468994140625, "learning_rate": 8.396171557730369e-06, "loss": 115.574, "step": 41590 }, { "epoch": 0.3441287173760185, "grad_norm": 634.4130859375, "learning_rate": 8.39513632393537e-06, "loss": 93.7718, "step": 41600 }, { "epoch": 0.3442114406253878, "grad_norm": 579.6549072265625, "learning_rate": 8.394100820004954e-06, "loss": 92.9008, "step": 41610 }, { "epoch": 0.344294163874757, "grad_norm": 450.8374938964844, "learning_rate": 8.393065046021513e-06, "loss": 90.1996, "step": 41620 }, { "epoch": 0.34437688712412623, "grad_norm": 572.2847290039062, "learning_rate": 8.39202900206746e-06, "loss": 147.2724, "step": 41630 }, { "epoch": 0.3444596103734955, "grad_norm": 2148.787353515625, "learning_rate": 8.390992688225226e-06, "loss": 132.4656, "step": 41640 }, { "epoch": 0.3445423336228647, "grad_norm": 707.6249389648438, "learning_rate": 8.389956104577265e-06, "loss": 90.7987, "step": 41650 }, { "epoch": 0.34462505687223394, "grad_norm": 1235.2393798828125, "learning_rate": 8.388919251206054e-06, "loss": 120.5997, "step": 41660 }, { "epoch": 0.3447077801216032, "grad_norm": 802.1920166015625, "learning_rate": 8.387882128194094e-06, "loss": 127.8519, "step": 41670 }, { "epoch": 0.3447905033709724, "grad_norm": 1075.386962890625, "learning_rate": 8.3868447356239e-06, "loss": 95.1565, "step": 41680 }, { "epoch": 0.34487322662034164, "grad_norm": 1524.40283203125, "learning_rate": 8.385807073578014e-06, "loss": 125.0543, "step": 41690 }, { "epoch": 0.3449559498697109, "grad_norm": 887.3607177734375, "learning_rate": 8.384769142138998e-06, "loss": 86.6021, "step": 41700 }, { "epoch": 0.3450386731190801, "grad_norm": 932.5927124023438, "learning_rate": 8.383730941389434e-06, "loss": 96.8807, "step": 41710 }, { "epoch": 0.34512139636844935, "grad_norm": 956.2900390625, "learning_rate": 8.382692471411931e-06, "loss": 97.0625, "step": 41720 }, { "epoch": 0.3452041196178186, "grad_norm": 646.645263671875, "learning_rate": 8.38165373228911e-06, "loss": 95.7504, "step": 41730 }, { "epoch": 0.3452868428671878, "grad_norm": 1114.9278564453125, "learning_rate": 8.380614724103622e-06, "loss": 120.9078, "step": 41740 }, { "epoch": 0.34536956611655706, "grad_norm": 693.3104248046875, "learning_rate": 8.379575446938136e-06, "loss": 106.318, "step": 41750 }, { "epoch": 0.3454522893659263, "grad_norm": 8694.8857421875, "learning_rate": 8.37853590087534e-06, "loss": 133.577, "step": 41760 }, { "epoch": 0.3455350126152955, "grad_norm": 645.8015747070312, "learning_rate": 8.377496085997949e-06, "loss": 124.9107, "step": 41770 }, { "epoch": 0.34561773586466477, "grad_norm": 2443.57421875, "learning_rate": 8.376456002388695e-06, "loss": 115.6421, "step": 41780 }, { "epoch": 0.345700459114034, "grad_norm": 1185.3404541015625, "learning_rate": 8.375415650130332e-06, "loss": 103.3099, "step": 41790 }, { "epoch": 0.3457831823634032, "grad_norm": 1205.232666015625, "learning_rate": 8.37437502930564e-06, "loss": 100.6235, "step": 41800 }, { "epoch": 0.3458659056127725, "grad_norm": 1242.7322998046875, "learning_rate": 8.373334139997409e-06, "loss": 131.52, "step": 41810 }, { "epoch": 0.3459486288621417, "grad_norm": 747.3458862304688, "learning_rate": 8.372292982288463e-06, "loss": 118.5125, "step": 41820 }, { "epoch": 0.3460313521115109, "grad_norm": 1096.74462890625, "learning_rate": 8.371251556261642e-06, "loss": 136.9112, "step": 41830 }, { "epoch": 0.3461140753608802, "grad_norm": 801.9876708984375, "learning_rate": 8.370209861999807e-06, "loss": 106.0218, "step": 41840 }, { "epoch": 0.34619679861024943, "grad_norm": 909.0194091796875, "learning_rate": 8.36916789958584e-06, "loss": 103.4507, "step": 41850 }, { "epoch": 0.34627952185961863, "grad_norm": 1055.92041015625, "learning_rate": 8.368125669102645e-06, "loss": 94.5659, "step": 41860 }, { "epoch": 0.3463622451089879, "grad_norm": 867.0262451171875, "learning_rate": 8.36708317063315e-06, "loss": 135.4051, "step": 41870 }, { "epoch": 0.34644496835835714, "grad_norm": 871.2109985351562, "learning_rate": 8.366040404260298e-06, "loss": 99.1085, "step": 41880 }, { "epoch": 0.34652769160772634, "grad_norm": 830.19384765625, "learning_rate": 8.36499737006706e-06, "loss": 76.3817, "step": 41890 }, { "epoch": 0.3466104148570956, "grad_norm": 951.84326171875, "learning_rate": 8.363954068136424e-06, "loss": 119.5681, "step": 41900 }, { "epoch": 0.34669313810646485, "grad_norm": 731.0087890625, "learning_rate": 8.362910498551402e-06, "loss": 114.4868, "step": 41910 }, { "epoch": 0.34677586135583405, "grad_norm": 635.1656494140625, "learning_rate": 8.361866661395024e-06, "loss": 116.8612, "step": 41920 }, { "epoch": 0.3468585846052033, "grad_norm": 1068.5445556640625, "learning_rate": 8.360822556750345e-06, "loss": 91.3164, "step": 41930 }, { "epoch": 0.3469413078545725, "grad_norm": 949.19873046875, "learning_rate": 8.35977818470044e-06, "loss": 104.9618, "step": 41940 }, { "epoch": 0.34702403110394175, "grad_norm": 429.62677001953125, "learning_rate": 8.358733545328404e-06, "loss": 93.7747, "step": 41950 }, { "epoch": 0.347106754353311, "grad_norm": 893.44189453125, "learning_rate": 8.357688638717354e-06, "loss": 106.5521, "step": 41960 }, { "epoch": 0.3471894776026802, "grad_norm": 585.1188354492188, "learning_rate": 8.356643464950428e-06, "loss": 80.4151, "step": 41970 }, { "epoch": 0.34727220085204946, "grad_norm": 1215.0146484375, "learning_rate": 8.355598024110789e-06, "loss": 181.5081, "step": 41980 }, { "epoch": 0.3473549241014187, "grad_norm": 1458.9388427734375, "learning_rate": 8.354552316281613e-06, "loss": 141.7899, "step": 41990 }, { "epoch": 0.3474376473507879, "grad_norm": 915.29443359375, "learning_rate": 8.353506341546106e-06, "loss": 108.375, "step": 42000 }, { "epoch": 0.34752037060015717, "grad_norm": 1018.5140991210938, "learning_rate": 8.352460099987488e-06, "loss": 118.4601, "step": 42010 }, { "epoch": 0.3476030938495264, "grad_norm": 1653.6846923828125, "learning_rate": 8.351413591689007e-06, "loss": 127.7061, "step": 42020 }, { "epoch": 0.3476858170988956, "grad_norm": 1303.524169921875, "learning_rate": 8.350366816733927e-06, "loss": 109.428, "step": 42030 }, { "epoch": 0.3477685403482649, "grad_norm": 1039.2083740234375, "learning_rate": 8.349319775205536e-06, "loss": 120.8401, "step": 42040 }, { "epoch": 0.34785126359763413, "grad_norm": 1214.731689453125, "learning_rate": 8.34827246718714e-06, "loss": 157.5093, "step": 42050 }, { "epoch": 0.34793398684700333, "grad_norm": 832.408203125, "learning_rate": 8.347224892762072e-06, "loss": 106.25, "step": 42060 }, { "epoch": 0.3480167100963726, "grad_norm": 1899.1883544921875, "learning_rate": 8.346177052013681e-06, "loss": 128.2392, "step": 42070 }, { "epoch": 0.34809943334574184, "grad_norm": 475.4049072265625, "learning_rate": 8.345128945025338e-06, "loss": 128.0041, "step": 42080 }, { "epoch": 0.34818215659511104, "grad_norm": 483.9809875488281, "learning_rate": 8.344080571880438e-06, "loss": 92.6426, "step": 42090 }, { "epoch": 0.3482648798444803, "grad_norm": 1139.35302734375, "learning_rate": 8.343031932662394e-06, "loss": 89.6336, "step": 42100 }, { "epoch": 0.34834760309384954, "grad_norm": 998.1423950195312, "learning_rate": 8.341983027454641e-06, "loss": 148.1835, "step": 42110 }, { "epoch": 0.34843032634321874, "grad_norm": 1257.747802734375, "learning_rate": 8.340933856340637e-06, "loss": 116.7931, "step": 42120 }, { "epoch": 0.348513049592588, "grad_norm": 1050.6527099609375, "learning_rate": 8.339884419403857e-06, "loss": 124.4426, "step": 42130 }, { "epoch": 0.34859577284195725, "grad_norm": 935.9532470703125, "learning_rate": 8.338834716727801e-06, "loss": 100.9498, "step": 42140 }, { "epoch": 0.34867849609132645, "grad_norm": 1319.49365234375, "learning_rate": 8.337784748395992e-06, "loss": 89.4219, "step": 42150 }, { "epoch": 0.3487612193406957, "grad_norm": 583.6215209960938, "learning_rate": 8.336734514491968e-06, "loss": 101.7242, "step": 42160 }, { "epoch": 0.34884394259006496, "grad_norm": 1093.5703125, "learning_rate": 8.335684015099294e-06, "loss": 100.3031, "step": 42170 }, { "epoch": 0.34892666583943416, "grad_norm": 7167.025390625, "learning_rate": 8.33463325030155e-06, "loss": 142.965, "step": 42180 }, { "epoch": 0.3490093890888034, "grad_norm": 1053.3721923828125, "learning_rate": 8.333582220182344e-06, "loss": 118.1564, "step": 42190 }, { "epoch": 0.34909211233817267, "grad_norm": 1960.47705078125, "learning_rate": 8.332530924825297e-06, "loss": 119.301, "step": 42200 }, { "epoch": 0.34917483558754187, "grad_norm": 973.1529541015625, "learning_rate": 8.33147936431406e-06, "loss": 131.6417, "step": 42210 }, { "epoch": 0.3492575588369111, "grad_norm": 925.6248168945312, "learning_rate": 8.3304275387323e-06, "loss": 116.9732, "step": 42220 }, { "epoch": 0.3493402820862804, "grad_norm": 0.0, "learning_rate": 8.329375448163703e-06, "loss": 87.3547, "step": 42230 }, { "epoch": 0.3494230053356496, "grad_norm": 1005.3858032226562, "learning_rate": 8.328323092691985e-06, "loss": 104.9806, "step": 42240 }, { "epoch": 0.3495057285850188, "grad_norm": 790.18603515625, "learning_rate": 8.32727047240087e-06, "loss": 141.6189, "step": 42250 }, { "epoch": 0.3495884518343881, "grad_norm": 885.5327758789062, "learning_rate": 8.326217587374115e-06, "loss": 95.2874, "step": 42260 }, { "epoch": 0.3496711750837573, "grad_norm": 1333.3345947265625, "learning_rate": 8.325164437695493e-06, "loss": 110.1591, "step": 42270 }, { "epoch": 0.34975389833312653, "grad_norm": 444.2149963378906, "learning_rate": 8.324111023448795e-06, "loss": 89.4089, "step": 42280 }, { "epoch": 0.3498366215824958, "grad_norm": 712.4427490234375, "learning_rate": 8.32305734471784e-06, "loss": 104.8016, "step": 42290 }, { "epoch": 0.349919344831865, "grad_norm": 825.5255737304688, "learning_rate": 8.322003401586463e-06, "loss": 109.8285, "step": 42300 }, { "epoch": 0.35000206808123424, "grad_norm": 674.3945922851562, "learning_rate": 8.32094919413852e-06, "loss": 118.0758, "step": 42310 }, { "epoch": 0.35008479133060344, "grad_norm": 1092.6353759765625, "learning_rate": 8.319894722457892e-06, "loss": 100.6579, "step": 42320 }, { "epoch": 0.3501675145799727, "grad_norm": 791.6581420898438, "learning_rate": 8.318839986628477e-06, "loss": 96.1517, "step": 42330 }, { "epoch": 0.35025023782934195, "grad_norm": 483.9101867675781, "learning_rate": 8.317784986734194e-06, "loss": 86.3806, "step": 42340 }, { "epoch": 0.35033296107871115, "grad_norm": 982.6383056640625, "learning_rate": 8.316729722858987e-06, "loss": 142.3889, "step": 42350 }, { "epoch": 0.3504156843280804, "grad_norm": 1031.0849609375, "learning_rate": 8.31567419508682e-06, "loss": 89.9149, "step": 42360 }, { "epoch": 0.35049840757744966, "grad_norm": 1122.879150390625, "learning_rate": 8.31461840350167e-06, "loss": 103.2544, "step": 42370 }, { "epoch": 0.35058113082681885, "grad_norm": 1027.361328125, "learning_rate": 8.313562348187549e-06, "loss": 90.0591, "step": 42380 }, { "epoch": 0.3506638540761881, "grad_norm": 789.521240234375, "learning_rate": 8.312506029228478e-06, "loss": 90.2937, "step": 42390 }, { "epoch": 0.35074657732555736, "grad_norm": 616.3732299804688, "learning_rate": 8.311449446708506e-06, "loss": 129.6206, "step": 42400 }, { "epoch": 0.35082930057492656, "grad_norm": 1325.065185546875, "learning_rate": 8.310392600711698e-06, "loss": 114.9959, "step": 42410 }, { "epoch": 0.3509120238242958, "grad_norm": 976.2049560546875, "learning_rate": 8.309335491322143e-06, "loss": 95.8001, "step": 42420 }, { "epoch": 0.35099474707366507, "grad_norm": 1060.470947265625, "learning_rate": 8.30827811862395e-06, "loss": 101.8484, "step": 42430 }, { "epoch": 0.35107747032303427, "grad_norm": 813.407470703125, "learning_rate": 8.307220482701251e-06, "loss": 119.0382, "step": 42440 }, { "epoch": 0.3511601935724035, "grad_norm": 769.6190795898438, "learning_rate": 8.306162583638197e-06, "loss": 116.1655, "step": 42450 }, { "epoch": 0.3512429168217728, "grad_norm": 1182.1982421875, "learning_rate": 8.305104421518959e-06, "loss": 116.6159, "step": 42460 }, { "epoch": 0.351325640071142, "grad_norm": 1151.38671875, "learning_rate": 8.30404599642773e-06, "loss": 97.3802, "step": 42470 }, { "epoch": 0.35140836332051123, "grad_norm": 0.0, "learning_rate": 8.302987308448724e-06, "loss": 81.0704, "step": 42480 }, { "epoch": 0.3514910865698805, "grad_norm": 728.1912231445312, "learning_rate": 8.301928357666178e-06, "loss": 92.2258, "step": 42490 }, { "epoch": 0.3515738098192497, "grad_norm": 793.99267578125, "learning_rate": 8.300869144164346e-06, "loss": 110.2738, "step": 42500 }, { "epoch": 0.35165653306861894, "grad_norm": 993.7640991210938, "learning_rate": 8.299809668027505e-06, "loss": 131.7156, "step": 42510 }, { "epoch": 0.3517392563179882, "grad_norm": 606.397705078125, "learning_rate": 8.298749929339953e-06, "loss": 104.0031, "step": 42520 }, { "epoch": 0.3518219795673574, "grad_norm": 827.4598388671875, "learning_rate": 8.297689928186009e-06, "loss": 110.1917, "step": 42530 }, { "epoch": 0.35190470281672664, "grad_norm": 2409.9150390625, "learning_rate": 8.29662966465001e-06, "loss": 135.4566, "step": 42540 }, { "epoch": 0.3519874260660959, "grad_norm": 613.3368530273438, "learning_rate": 8.295569138816319e-06, "loss": 80.187, "step": 42550 }, { "epoch": 0.3520701493154651, "grad_norm": 843.9635009765625, "learning_rate": 8.294508350769315e-06, "loss": 109.0294, "step": 42560 }, { "epoch": 0.35215287256483435, "grad_norm": 622.92578125, "learning_rate": 8.293447300593402e-06, "loss": 127.0855, "step": 42570 }, { "epoch": 0.3522355958142036, "grad_norm": 657.6416625976562, "learning_rate": 8.292385988373005e-06, "loss": 108.607, "step": 42580 }, { "epoch": 0.3523183190635728, "grad_norm": 1057.9915771484375, "learning_rate": 8.29132441419256e-06, "loss": 117.3586, "step": 42590 }, { "epoch": 0.35240104231294206, "grad_norm": 704.32421875, "learning_rate": 8.290262578136541e-06, "loss": 92.2817, "step": 42600 }, { "epoch": 0.3524837655623113, "grad_norm": 391.53265380859375, "learning_rate": 8.289200480289426e-06, "loss": 91.4089, "step": 42610 }, { "epoch": 0.3525664888116805, "grad_norm": 1248.0340576171875, "learning_rate": 8.288138120735726e-06, "loss": 94.6713, "step": 42620 }, { "epoch": 0.35264921206104977, "grad_norm": 1576.25, "learning_rate": 8.287075499559965e-06, "loss": 120.6687, "step": 42630 }, { "epoch": 0.352731935310419, "grad_norm": 850.7510375976562, "learning_rate": 8.286012616846693e-06, "loss": 68.9104, "step": 42640 }, { "epoch": 0.3528146585597882, "grad_norm": 1018.4036254882812, "learning_rate": 8.284949472680477e-06, "loss": 96.3005, "step": 42650 }, { "epoch": 0.3528973818091575, "grad_norm": 769.276611328125, "learning_rate": 8.283886067145908e-06, "loss": 101.5941, "step": 42660 }, { "epoch": 0.35298010505852667, "grad_norm": 1009.304931640625, "learning_rate": 8.282822400327595e-06, "loss": 100.8695, "step": 42670 }, { "epoch": 0.3530628283078959, "grad_norm": 965.6119384765625, "learning_rate": 8.28175847231017e-06, "loss": 112.0248, "step": 42680 }, { "epoch": 0.3531455515572652, "grad_norm": 862.6695556640625, "learning_rate": 8.280694283178285e-06, "loss": 97.2944, "step": 42690 }, { "epoch": 0.3532282748066344, "grad_norm": 698.0172119140625, "learning_rate": 8.27962983301661e-06, "loss": 66.0908, "step": 42700 }, { "epoch": 0.35331099805600363, "grad_norm": 680.3596801757812, "learning_rate": 8.278565121909845e-06, "loss": 56.0012, "step": 42710 }, { "epoch": 0.3533937213053729, "grad_norm": 979.0956420898438, "learning_rate": 8.277500149942697e-06, "loss": 111.6114, "step": 42720 }, { "epoch": 0.3534764445547421, "grad_norm": 839.169921875, "learning_rate": 8.276434917199904e-06, "loss": 126.5481, "step": 42730 }, { "epoch": 0.35355916780411134, "grad_norm": 1183.0323486328125, "learning_rate": 8.275369423766222e-06, "loss": 97.7488, "step": 42740 }, { "epoch": 0.3536418910534806, "grad_norm": 1349.1572265625, "learning_rate": 8.274303669726427e-06, "loss": 110.0713, "step": 42750 }, { "epoch": 0.3537246143028498, "grad_norm": 916.60107421875, "learning_rate": 8.273237655165314e-06, "loss": 106.8208, "step": 42760 }, { "epoch": 0.35380733755221905, "grad_norm": 800.146484375, "learning_rate": 8.272171380167705e-06, "loss": 83.875, "step": 42770 }, { "epoch": 0.3538900608015883, "grad_norm": 1539.4642333984375, "learning_rate": 8.271104844818436e-06, "loss": 130.0894, "step": 42780 }, { "epoch": 0.3539727840509575, "grad_norm": 1235.24560546875, "learning_rate": 8.270038049202366e-06, "loss": 131.9467, "step": 42790 }, { "epoch": 0.35405550730032675, "grad_norm": 962.0623168945312, "learning_rate": 8.268970993404377e-06, "loss": 100.8265, "step": 42800 }, { "epoch": 0.354138230549696, "grad_norm": 836.7759399414062, "learning_rate": 8.267903677509368e-06, "loss": 119.7814, "step": 42810 }, { "epoch": 0.3542209537990652, "grad_norm": 1205.632080078125, "learning_rate": 8.266836101602263e-06, "loss": 107.5889, "step": 42820 }, { "epoch": 0.35430367704843446, "grad_norm": 1180.9609375, "learning_rate": 8.265768265767999e-06, "loss": 146.9007, "step": 42830 }, { "epoch": 0.3543864002978037, "grad_norm": 784.8786010742188, "learning_rate": 8.264700170091543e-06, "loss": 93.1217, "step": 42840 }, { "epoch": 0.3544691235471729, "grad_norm": 583.6641845703125, "learning_rate": 8.263631814657879e-06, "loss": 127.952, "step": 42850 }, { "epoch": 0.35455184679654217, "grad_norm": 1015.6278076171875, "learning_rate": 8.262563199552007e-06, "loss": 109.5995, "step": 42860 }, { "epoch": 0.3546345700459114, "grad_norm": 1008.2879638671875, "learning_rate": 8.261494324858956e-06, "loss": 83.4442, "step": 42870 }, { "epoch": 0.3547172932952806, "grad_norm": 934.4283447265625, "learning_rate": 8.26042519066377e-06, "loss": 81.4526, "step": 42880 }, { "epoch": 0.3548000165446499, "grad_norm": 602.0402221679688, "learning_rate": 8.259355797051515e-06, "loss": 87.8508, "step": 42890 }, { "epoch": 0.35488273979401913, "grad_norm": 952.8324584960938, "learning_rate": 8.258286144107277e-06, "loss": 101.1536, "step": 42900 }, { "epoch": 0.35496546304338833, "grad_norm": 924.0335693359375, "learning_rate": 8.257216231916162e-06, "loss": 113.0049, "step": 42910 }, { "epoch": 0.3550481862927576, "grad_norm": 684.9584350585938, "learning_rate": 8.256146060563304e-06, "loss": 117.6641, "step": 42920 }, { "epoch": 0.35513090954212684, "grad_norm": 1416.010009765625, "learning_rate": 8.255075630133847e-06, "loss": 93.2686, "step": 42930 }, { "epoch": 0.35521363279149604, "grad_norm": 1231.5831298828125, "learning_rate": 8.254004940712958e-06, "loss": 111.2918, "step": 42940 }, { "epoch": 0.3552963560408653, "grad_norm": 1368.33935546875, "learning_rate": 8.252933992385833e-06, "loss": 101.9154, "step": 42950 }, { "epoch": 0.35537907929023455, "grad_norm": 754.49609375, "learning_rate": 8.251862785237676e-06, "loss": 88.8121, "step": 42960 }, { "epoch": 0.35546180253960374, "grad_norm": 1223.3607177734375, "learning_rate": 8.250791319353723e-06, "loss": 111.0358, "step": 42970 }, { "epoch": 0.355544525788973, "grad_norm": 1084.2374267578125, "learning_rate": 8.249719594819225e-06, "loss": 107.8028, "step": 42980 }, { "epoch": 0.35562724903834225, "grad_norm": 1002.8360595703125, "learning_rate": 8.248647611719452e-06, "loss": 87.2639, "step": 42990 }, { "epoch": 0.35570997228771145, "grad_norm": 882.518310546875, "learning_rate": 8.247575370139695e-06, "loss": 120.7826, "step": 43000 }, { "epoch": 0.3557926955370807, "grad_norm": 710.4674682617188, "learning_rate": 8.246502870165273e-06, "loss": 130.1348, "step": 43010 }, { "epoch": 0.3558754187864499, "grad_norm": 705.7244262695312, "learning_rate": 8.245430111881519e-06, "loss": 82.2953, "step": 43020 }, { "epoch": 0.35595814203581916, "grad_norm": 1012.916748046875, "learning_rate": 8.244357095373783e-06, "loss": 103.8642, "step": 43030 }, { "epoch": 0.3560408652851884, "grad_norm": 823.5924682617188, "learning_rate": 8.243283820727441e-06, "loss": 120.169, "step": 43040 }, { "epoch": 0.3561235885345576, "grad_norm": 506.7729187011719, "learning_rate": 8.242210288027893e-06, "loss": 106.6605, "step": 43050 }, { "epoch": 0.35620631178392687, "grad_norm": 573.303955078125, "learning_rate": 8.241136497360552e-06, "loss": 96.0248, "step": 43060 }, { "epoch": 0.3562890350332961, "grad_norm": 1349.73974609375, "learning_rate": 8.240062448810853e-06, "loss": 150.305, "step": 43070 }, { "epoch": 0.3563717582826653, "grad_norm": 898.83349609375, "learning_rate": 8.238988142464254e-06, "loss": 99.9782, "step": 43080 }, { "epoch": 0.3564544815320346, "grad_norm": 1892.2440185546875, "learning_rate": 8.237913578406236e-06, "loss": 127.4689, "step": 43090 }, { "epoch": 0.3565372047814038, "grad_norm": 1101.9765625, "learning_rate": 8.236838756722294e-06, "loss": 99.9242, "step": 43100 }, { "epoch": 0.356619928030773, "grad_norm": 785.5177001953125, "learning_rate": 8.235763677497945e-06, "loss": 102.3918, "step": 43110 }, { "epoch": 0.3567026512801423, "grad_norm": 1374.7742919921875, "learning_rate": 8.234688340818732e-06, "loss": 113.4264, "step": 43120 }, { "epoch": 0.35678537452951153, "grad_norm": 606.4083862304688, "learning_rate": 8.233612746770214e-06, "loss": 93.4949, "step": 43130 }, { "epoch": 0.35686809777888073, "grad_norm": 922.9923095703125, "learning_rate": 8.232536895437968e-06, "loss": 85.8221, "step": 43140 }, { "epoch": 0.35695082102825, "grad_norm": 790.5139770507812, "learning_rate": 8.231460786907597e-06, "loss": 117.3538, "step": 43150 }, { "epoch": 0.35703354427761924, "grad_norm": 417.8067626953125, "learning_rate": 8.230384421264722e-06, "loss": 87.6937, "step": 43160 }, { "epoch": 0.35711626752698844, "grad_norm": 1458.1962890625, "learning_rate": 8.229307798594985e-06, "loss": 133.923, "step": 43170 }, { "epoch": 0.3571989907763577, "grad_norm": 987.9276123046875, "learning_rate": 8.228230918984046e-06, "loss": 111.6961, "step": 43180 }, { "epoch": 0.35728171402572695, "grad_norm": 1103.388427734375, "learning_rate": 8.22715378251759e-06, "loss": 93.5427, "step": 43190 }, { "epoch": 0.35736443727509615, "grad_norm": 697.0567626953125, "learning_rate": 8.226076389281316e-06, "loss": 117.8876, "step": 43200 }, { "epoch": 0.3574471605244654, "grad_norm": 1062.5294189453125, "learning_rate": 8.22499873936095e-06, "loss": 85.2779, "step": 43210 }, { "epoch": 0.35752988377383466, "grad_norm": 997.5350341796875, "learning_rate": 8.223920832842236e-06, "loss": 127.3359, "step": 43220 }, { "epoch": 0.35761260702320385, "grad_norm": 839.9952392578125, "learning_rate": 8.222842669810936e-06, "loss": 112.6368, "step": 43230 }, { "epoch": 0.3576953302725731, "grad_norm": 1012.621337890625, "learning_rate": 8.221764250352835e-06, "loss": 108.84, "step": 43240 }, { "epoch": 0.35777805352194236, "grad_norm": 918.2972412109375, "learning_rate": 8.220685574553739e-06, "loss": 85.8282, "step": 43250 }, { "epoch": 0.35786077677131156, "grad_norm": 776.9376220703125, "learning_rate": 8.219606642499474e-06, "loss": 96.5936, "step": 43260 }, { "epoch": 0.3579435000206808, "grad_norm": 868.3795166015625, "learning_rate": 8.218527454275884e-06, "loss": 91.7565, "step": 43270 }, { "epoch": 0.35802622327005007, "grad_norm": 958.1405029296875, "learning_rate": 8.217448009968834e-06, "loss": 110.3028, "step": 43280 }, { "epoch": 0.35810894651941927, "grad_norm": 1635.0120849609375, "learning_rate": 8.216368309664213e-06, "loss": 115.6983, "step": 43290 }, { "epoch": 0.3581916697687885, "grad_norm": 495.8813781738281, "learning_rate": 8.215288353447927e-06, "loss": 125.6738, "step": 43300 }, { "epoch": 0.3582743930181578, "grad_norm": 1934.987548828125, "learning_rate": 8.214208141405903e-06, "loss": 96.0109, "step": 43310 }, { "epoch": 0.358357116267527, "grad_norm": 1259.3150634765625, "learning_rate": 8.213127673624088e-06, "loss": 98.7009, "step": 43320 }, { "epoch": 0.35843983951689623, "grad_norm": 985.3812255859375, "learning_rate": 8.212046950188451e-06, "loss": 119.01, "step": 43330 }, { "epoch": 0.3585225627662655, "grad_norm": 657.3631591796875, "learning_rate": 8.21096597118498e-06, "loss": 102.065, "step": 43340 }, { "epoch": 0.3586052860156347, "grad_norm": 808.63232421875, "learning_rate": 8.209884736699681e-06, "loss": 86.2247, "step": 43350 }, { "epoch": 0.35868800926500394, "grad_norm": 963.5828857421875, "learning_rate": 8.208803246818586e-06, "loss": 99.5541, "step": 43360 }, { "epoch": 0.3587707325143732, "grad_norm": 356.1247253417969, "learning_rate": 8.207721501627743e-06, "loss": 132.9291, "step": 43370 }, { "epoch": 0.3588534557637424, "grad_norm": 1359.7357177734375, "learning_rate": 8.20663950121322e-06, "loss": 106.6871, "step": 43380 }, { "epoch": 0.35893617901311164, "grad_norm": 741.9186401367188, "learning_rate": 8.20555724566111e-06, "loss": 110.5265, "step": 43390 }, { "epoch": 0.35901890226248084, "grad_norm": 804.3803100585938, "learning_rate": 8.204474735057522e-06, "loss": 89.7678, "step": 43400 }, { "epoch": 0.3591016255118501, "grad_norm": 986.8497924804688, "learning_rate": 8.203391969488586e-06, "loss": 76.5805, "step": 43410 }, { "epoch": 0.35918434876121935, "grad_norm": 981.0845947265625, "learning_rate": 8.20230894904045e-06, "loss": 108.0519, "step": 43420 }, { "epoch": 0.35926707201058855, "grad_norm": 967.68310546875, "learning_rate": 8.20122567379929e-06, "loss": 144.2405, "step": 43430 }, { "epoch": 0.3593497952599578, "grad_norm": 1253.8336181640625, "learning_rate": 8.200142143851295e-06, "loss": 85.0357, "step": 43440 }, { "epoch": 0.35943251850932706, "grad_norm": 660.7843627929688, "learning_rate": 8.199058359282675e-06, "loss": 110.5242, "step": 43450 }, { "epoch": 0.35951524175869626, "grad_norm": 876.0247802734375, "learning_rate": 8.197974320179664e-06, "loss": 143.5727, "step": 43460 }, { "epoch": 0.3595979650080655, "grad_norm": 1184.567626953125, "learning_rate": 8.19689002662851e-06, "loss": 87.2803, "step": 43470 }, { "epoch": 0.35968068825743477, "grad_norm": 899.4736938476562, "learning_rate": 8.195805478715492e-06, "loss": 94.5841, "step": 43480 }, { "epoch": 0.35976341150680397, "grad_norm": 1176.9891357421875, "learning_rate": 8.194720676526898e-06, "loss": 105.5688, "step": 43490 }, { "epoch": 0.3598461347561732, "grad_norm": 1430.493896484375, "learning_rate": 8.193635620149041e-06, "loss": 120.161, "step": 43500 }, { "epoch": 0.3599288580055425, "grad_norm": 1398.2462158203125, "learning_rate": 8.192550309668254e-06, "loss": 153.4543, "step": 43510 }, { "epoch": 0.3600115812549117, "grad_norm": 978.09033203125, "learning_rate": 8.191464745170892e-06, "loss": 97.1732, "step": 43520 }, { "epoch": 0.3600943045042809, "grad_norm": 1013.3282470703125, "learning_rate": 8.190378926743327e-06, "loss": 101.2923, "step": 43530 }, { "epoch": 0.3601770277536502, "grad_norm": 881.2088012695312, "learning_rate": 8.189292854471953e-06, "loss": 148.854, "step": 43540 }, { "epoch": 0.3602597510030194, "grad_norm": 870.5662841796875, "learning_rate": 8.188206528443182e-06, "loss": 92.8082, "step": 43550 }, { "epoch": 0.36034247425238863, "grad_norm": 776.38916015625, "learning_rate": 8.18711994874345e-06, "loss": 82.2745, "step": 43560 }, { "epoch": 0.3604251975017579, "grad_norm": 904.0106811523438, "learning_rate": 8.186033115459211e-06, "loss": 97.3916, "step": 43570 }, { "epoch": 0.3605079207511271, "grad_norm": 1215.83349609375, "learning_rate": 8.184946028676937e-06, "loss": 106.3127, "step": 43580 }, { "epoch": 0.36059064400049634, "grad_norm": 794.0036010742188, "learning_rate": 8.183858688483126e-06, "loss": 91.0681, "step": 43590 }, { "epoch": 0.3606733672498656, "grad_norm": 1268.9676513671875, "learning_rate": 8.182771094964292e-06, "loss": 123.4264, "step": 43600 }, { "epoch": 0.3607560904992348, "grad_norm": 777.9105224609375, "learning_rate": 8.181683248206968e-06, "loss": 111.6841, "step": 43610 }, { "epoch": 0.36083881374860405, "grad_norm": 854.5106201171875, "learning_rate": 8.180595148297709e-06, "loss": 113.4441, "step": 43620 }, { "epoch": 0.3609215369979733, "grad_norm": 850.8244018554688, "learning_rate": 8.179506795323092e-06, "loss": 135.2171, "step": 43630 }, { "epoch": 0.3610042602473425, "grad_norm": 1269.437255859375, "learning_rate": 8.17841818936971e-06, "loss": 155.0749, "step": 43640 }, { "epoch": 0.36108698349671176, "grad_norm": 1103.447509765625, "learning_rate": 8.177329330524182e-06, "loss": 85.1156, "step": 43650 }, { "epoch": 0.361169706746081, "grad_norm": 800.2313232421875, "learning_rate": 8.17624021887314e-06, "loss": 103.1669, "step": 43660 }, { "epoch": 0.3612524299954502, "grad_norm": 662.4274291992188, "learning_rate": 8.17515085450324e-06, "loss": 98.6518, "step": 43670 }, { "epoch": 0.36133515324481946, "grad_norm": 975.4820556640625, "learning_rate": 8.174061237501159e-06, "loss": 120.7466, "step": 43680 }, { "epoch": 0.3614178764941887, "grad_norm": 1076.44873046875, "learning_rate": 8.172971367953593e-06, "loss": 80.2128, "step": 43690 }, { "epoch": 0.3615005997435579, "grad_norm": 1007.4608764648438, "learning_rate": 8.171881245947257e-06, "loss": 62.0215, "step": 43700 }, { "epoch": 0.36158332299292717, "grad_norm": 1051.32177734375, "learning_rate": 8.170790871568887e-06, "loss": 157.7504, "step": 43710 }, { "epoch": 0.3616660462422964, "grad_norm": 1688.2108154296875, "learning_rate": 8.169700244905239e-06, "loss": 123.6984, "step": 43720 }, { "epoch": 0.3617487694916656, "grad_norm": 1116.3714599609375, "learning_rate": 8.168609366043089e-06, "loss": 92.2827, "step": 43730 }, { "epoch": 0.3618314927410349, "grad_norm": 845.093505859375, "learning_rate": 8.167518235069234e-06, "loss": 77.9922, "step": 43740 }, { "epoch": 0.3619142159904041, "grad_norm": 2042.611572265625, "learning_rate": 8.16642685207049e-06, "loss": 141.047, "step": 43750 }, { "epoch": 0.36199693923977333, "grad_norm": 602.3466186523438, "learning_rate": 8.165335217133695e-06, "loss": 122.7751, "step": 43760 }, { "epoch": 0.3620796624891426, "grad_norm": 998.31005859375, "learning_rate": 8.164243330345702e-06, "loss": 95.6849, "step": 43770 }, { "epoch": 0.3621623857385118, "grad_norm": 939.9814453125, "learning_rate": 8.16315119179339e-06, "loss": 88.2418, "step": 43780 }, { "epoch": 0.36224510898788104, "grad_norm": 2236.7392578125, "learning_rate": 8.162058801563652e-06, "loss": 116.7937, "step": 43790 }, { "epoch": 0.3623278322372503, "grad_norm": 973.8187255859375, "learning_rate": 8.160966159743411e-06, "loss": 94.5988, "step": 43800 }, { "epoch": 0.3624105554866195, "grad_norm": 640.3901977539062, "learning_rate": 8.159873266419598e-06, "loss": 103.685, "step": 43810 }, { "epoch": 0.36249327873598874, "grad_norm": 1432.554931640625, "learning_rate": 8.15878012167917e-06, "loss": 106.0658, "step": 43820 }, { "epoch": 0.362576001985358, "grad_norm": 859.1890258789062, "learning_rate": 8.157686725609105e-06, "loss": 87.8233, "step": 43830 }, { "epoch": 0.3626587252347272, "grad_norm": 1776.3751220703125, "learning_rate": 8.1565930782964e-06, "loss": 114.2181, "step": 43840 }, { "epoch": 0.36274144848409645, "grad_norm": 926.181884765625, "learning_rate": 8.155499179828068e-06, "loss": 114.7968, "step": 43850 }, { "epoch": 0.3628241717334657, "grad_norm": 959.3102416992188, "learning_rate": 8.15440503029115e-06, "loss": 105.2323, "step": 43860 }, { "epoch": 0.3629068949828349, "grad_norm": 1929.3040771484375, "learning_rate": 8.153310629772702e-06, "loss": 131.4064, "step": 43870 }, { "epoch": 0.36298961823220416, "grad_norm": 1120.273193359375, "learning_rate": 8.152215978359796e-06, "loss": 92.3281, "step": 43880 }, { "epoch": 0.3630723414815734, "grad_norm": 830.0736694335938, "learning_rate": 8.151121076139534e-06, "loss": 91.5073, "step": 43890 }, { "epoch": 0.3631550647309426, "grad_norm": 1354.7823486328125, "learning_rate": 8.150025923199027e-06, "loss": 201.7689, "step": 43900 }, { "epoch": 0.36323778798031187, "grad_norm": 1229.7574462890625, "learning_rate": 8.148930519625417e-06, "loss": 116.0604, "step": 43910 }, { "epoch": 0.3633205112296811, "grad_norm": 1157.707763671875, "learning_rate": 8.147834865505855e-06, "loss": 118.8252, "step": 43920 }, { "epoch": 0.3634032344790503, "grad_norm": 391.3431396484375, "learning_rate": 8.14673896092752e-06, "loss": 94.0042, "step": 43930 }, { "epoch": 0.3634859577284196, "grad_norm": 963.4109497070312, "learning_rate": 8.145642805977608e-06, "loss": 94.306, "step": 43940 }, { "epoch": 0.3635686809777888, "grad_norm": 680.21826171875, "learning_rate": 8.144546400743334e-06, "loss": 121.9921, "step": 43950 }, { "epoch": 0.363651404227158, "grad_norm": 763.9702758789062, "learning_rate": 8.143449745311934e-06, "loss": 105.5048, "step": 43960 }, { "epoch": 0.3637341274765273, "grad_norm": 706.5518188476562, "learning_rate": 8.142352839770663e-06, "loss": 112.0056, "step": 43970 }, { "epoch": 0.36381685072589653, "grad_norm": 1533.0765380859375, "learning_rate": 8.1412556842068e-06, "loss": 109.1279, "step": 43980 }, { "epoch": 0.36389957397526573, "grad_norm": 784.7587890625, "learning_rate": 8.140158278707637e-06, "loss": 121.243, "step": 43990 }, { "epoch": 0.363982297224635, "grad_norm": 886.728759765625, "learning_rate": 8.139060623360494e-06, "loss": 131.5882, "step": 44000 }, { "epoch": 0.36406502047400424, "grad_norm": 990.3309936523438, "learning_rate": 8.1379627182527e-06, "loss": 95.6152, "step": 44010 }, { "epoch": 0.36414774372337344, "grad_norm": 1236.8133544921875, "learning_rate": 8.136864563471617e-06, "loss": 104.638, "step": 44020 }, { "epoch": 0.3642304669727427, "grad_norm": 560.0664672851562, "learning_rate": 8.135766159104615e-06, "loss": 90.362, "step": 44030 }, { "epoch": 0.36431319022211195, "grad_norm": 796.653076171875, "learning_rate": 8.134667505239092e-06, "loss": 99.4829, "step": 44040 }, { "epoch": 0.36439591347148115, "grad_norm": 777.6304321289062, "learning_rate": 8.133568601962462e-06, "loss": 106.3731, "step": 44050 }, { "epoch": 0.3644786367208504, "grad_norm": 746.3777465820312, "learning_rate": 8.132469449362158e-06, "loss": 101.6638, "step": 44060 }, { "epoch": 0.36456135997021966, "grad_norm": 824.9530029296875, "learning_rate": 8.131370047525637e-06, "loss": 144.5076, "step": 44070 }, { "epoch": 0.36464408321958885, "grad_norm": 1749.48095703125, "learning_rate": 8.130270396540372e-06, "loss": 107.8925, "step": 44080 }, { "epoch": 0.3647268064689581, "grad_norm": 1590.762451171875, "learning_rate": 8.129170496493857e-06, "loss": 129.4328, "step": 44090 }, { "epoch": 0.36480952971832736, "grad_norm": 854.1180419921875, "learning_rate": 8.128070347473609e-06, "loss": 97.3397, "step": 44100 }, { "epoch": 0.36489225296769656, "grad_norm": 920.0262451171875, "learning_rate": 8.126969949567157e-06, "loss": 90.8626, "step": 44110 }, { "epoch": 0.3649749762170658, "grad_norm": 802.9644775390625, "learning_rate": 8.125869302862058e-06, "loss": 101.2598, "step": 44120 }, { "epoch": 0.365057699466435, "grad_norm": 721.7289428710938, "learning_rate": 8.124768407445883e-06, "loss": 91.293, "step": 44130 }, { "epoch": 0.36514042271580427, "grad_norm": 452.2812805175781, "learning_rate": 8.123667263406228e-06, "loss": 115.1237, "step": 44140 }, { "epoch": 0.3652231459651735, "grad_norm": 755.5316162109375, "learning_rate": 8.122565870830704e-06, "loss": 95.5803, "step": 44150 }, { "epoch": 0.3653058692145427, "grad_norm": 583.1863403320312, "learning_rate": 8.121464229806944e-06, "loss": 91.2347, "step": 44160 }, { "epoch": 0.365388592463912, "grad_norm": 894.2572631835938, "learning_rate": 8.120362340422601e-06, "loss": 101.122, "step": 44170 }, { "epoch": 0.36547131571328123, "grad_norm": 1000.3782958984375, "learning_rate": 8.119260202765347e-06, "loss": 104.094, "step": 44180 }, { "epoch": 0.36555403896265043, "grad_norm": 1205.3262939453125, "learning_rate": 8.118157816922874e-06, "loss": 107.1193, "step": 44190 }, { "epoch": 0.3656367622120197, "grad_norm": 1496.134033203125, "learning_rate": 8.117055182982895e-06, "loss": 84.7695, "step": 44200 }, { "epoch": 0.36571948546138894, "grad_norm": 1437.298095703125, "learning_rate": 8.115952301033141e-06, "loss": 117.0865, "step": 44210 }, { "epoch": 0.36580220871075814, "grad_norm": 1193.9093017578125, "learning_rate": 8.11484917116136e-06, "loss": 104.2912, "step": 44220 }, { "epoch": 0.3658849319601274, "grad_norm": 912.3463745117188, "learning_rate": 8.113745793455328e-06, "loss": 105.9879, "step": 44230 }, { "epoch": 0.36596765520949665, "grad_norm": 1175.63134765625, "learning_rate": 8.112642168002831e-06, "loss": 95.1146, "step": 44240 }, { "epoch": 0.36605037845886584, "grad_norm": 957.2208862304688, "learning_rate": 8.111538294891684e-06, "loss": 133.3301, "step": 44250 }, { "epoch": 0.3661331017082351, "grad_norm": 780.9307861328125, "learning_rate": 8.110434174209714e-06, "loss": 112.0869, "step": 44260 }, { "epoch": 0.36621582495760435, "grad_norm": 1046.54052734375, "learning_rate": 8.109329806044772e-06, "loss": 122.4268, "step": 44270 }, { "epoch": 0.36629854820697355, "grad_norm": 593.833984375, "learning_rate": 8.108225190484728e-06, "loss": 134.9322, "step": 44280 }, { "epoch": 0.3663812714563428, "grad_norm": 1084.296630859375, "learning_rate": 8.107120327617469e-06, "loss": 108.1544, "step": 44290 }, { "epoch": 0.36646399470571206, "grad_norm": 795.716552734375, "learning_rate": 8.106015217530906e-06, "loss": 104.3661, "step": 44300 }, { "epoch": 0.36654671795508126, "grad_norm": 969.0634765625, "learning_rate": 8.104909860312968e-06, "loss": 118.4515, "step": 44310 }, { "epoch": 0.3666294412044505, "grad_norm": 1363.8905029296875, "learning_rate": 8.1038042560516e-06, "loss": 109.1337, "step": 44320 }, { "epoch": 0.36671216445381977, "grad_norm": 715.2791748046875, "learning_rate": 8.102698404834773e-06, "loss": 79.2838, "step": 44330 }, { "epoch": 0.36679488770318897, "grad_norm": 1469.4854736328125, "learning_rate": 8.101592306750472e-06, "loss": 110.5569, "step": 44340 }, { "epoch": 0.3668776109525582, "grad_norm": 992.7304077148438, "learning_rate": 8.100485961886707e-06, "loss": 97.823, "step": 44350 }, { "epoch": 0.3669603342019275, "grad_norm": 1063.874755859375, "learning_rate": 8.099379370331502e-06, "loss": 112.1215, "step": 44360 }, { "epoch": 0.3670430574512967, "grad_norm": 726.4131469726562, "learning_rate": 8.098272532172906e-06, "loss": 135.1273, "step": 44370 }, { "epoch": 0.3671257807006659, "grad_norm": 1017.98193359375, "learning_rate": 8.097165447498985e-06, "loss": 102.8711, "step": 44380 }, { "epoch": 0.3672085039500352, "grad_norm": 1006.7951049804688, "learning_rate": 8.09605811639782e-06, "loss": 120.6296, "step": 44390 }, { "epoch": 0.3672912271994044, "grad_norm": 1553.7777099609375, "learning_rate": 8.094950538957523e-06, "loss": 116.9153, "step": 44400 }, { "epoch": 0.36737395044877363, "grad_norm": 578.3179931640625, "learning_rate": 8.093842715266214e-06, "loss": 86.2257, "step": 44410 }, { "epoch": 0.3674566736981429, "grad_norm": 1568.3966064453125, "learning_rate": 8.092734645412037e-06, "loss": 103.4828, "step": 44420 }, { "epoch": 0.3675393969475121, "grad_norm": 1186.5718994140625, "learning_rate": 8.09162632948316e-06, "loss": 117.7764, "step": 44430 }, { "epoch": 0.36762212019688134, "grad_norm": 1135.4630126953125, "learning_rate": 8.090517767567765e-06, "loss": 95.9603, "step": 44440 }, { "epoch": 0.3677048434462506, "grad_norm": 959.4806518554688, "learning_rate": 8.089408959754055e-06, "loss": 99.0822, "step": 44450 }, { "epoch": 0.3677875666956198, "grad_norm": 758.1735229492188, "learning_rate": 8.088299906130252e-06, "loss": 149.9401, "step": 44460 }, { "epoch": 0.36787028994498905, "grad_norm": 791.1016845703125, "learning_rate": 8.087190606784598e-06, "loss": 79.0925, "step": 44470 }, { "epoch": 0.36795301319435825, "grad_norm": 640.9752807617188, "learning_rate": 8.086081061805357e-06, "loss": 95.8233, "step": 44480 }, { "epoch": 0.3680357364437275, "grad_norm": 1189.6053466796875, "learning_rate": 8.084971271280808e-06, "loss": 122.035, "step": 44490 }, { "epoch": 0.36811845969309676, "grad_norm": 1168.3837890625, "learning_rate": 8.083861235299253e-06, "loss": 100.2019, "step": 44500 }, { "epoch": 0.36820118294246595, "grad_norm": 891.6553955078125, "learning_rate": 8.082750953949015e-06, "loss": 115.5503, "step": 44510 }, { "epoch": 0.3682839061918352, "grad_norm": 884.3746337890625, "learning_rate": 8.081640427318429e-06, "loss": 94.7234, "step": 44520 }, { "epoch": 0.36836662944120446, "grad_norm": 760.7252807617188, "learning_rate": 8.080529655495856e-06, "loss": 126.0468, "step": 44530 }, { "epoch": 0.36844935269057366, "grad_norm": 831.2738647460938, "learning_rate": 8.079418638569679e-06, "loss": 92.954, "step": 44540 }, { "epoch": 0.3685320759399429, "grad_norm": 460.5909118652344, "learning_rate": 8.078307376628292e-06, "loss": 83.0053, "step": 44550 }, { "epoch": 0.36861479918931217, "grad_norm": 795.8884887695312, "learning_rate": 8.077195869760114e-06, "loss": 86.7317, "step": 44560 }, { "epoch": 0.36869752243868137, "grad_norm": 683.8246459960938, "learning_rate": 8.076084118053584e-06, "loss": 90.0042, "step": 44570 }, { "epoch": 0.3687802456880506, "grad_norm": 1078.8155517578125, "learning_rate": 8.074972121597158e-06, "loss": 140.497, "step": 44580 }, { "epoch": 0.3688629689374199, "grad_norm": 447.01214599609375, "learning_rate": 8.073859880479314e-06, "loss": 76.7604, "step": 44590 }, { "epoch": 0.3689456921867891, "grad_norm": 948.76416015625, "learning_rate": 8.072747394788545e-06, "loss": 83.8023, "step": 44600 }, { "epoch": 0.36902841543615833, "grad_norm": 1048.3646240234375, "learning_rate": 8.071634664613367e-06, "loss": 102.7217, "step": 44610 }, { "epoch": 0.3691111386855276, "grad_norm": 859.9012451171875, "learning_rate": 8.070521690042317e-06, "loss": 103.1552, "step": 44620 }, { "epoch": 0.3691938619348968, "grad_norm": 1066.72314453125, "learning_rate": 8.069408471163947e-06, "loss": 121.4919, "step": 44630 }, { "epoch": 0.36927658518426604, "grad_norm": 969.5633544921875, "learning_rate": 8.068295008066832e-06, "loss": 128.1989, "step": 44640 }, { "epoch": 0.3693593084336353, "grad_norm": 1198.6636962890625, "learning_rate": 8.067181300839565e-06, "loss": 161.2369, "step": 44650 }, { "epoch": 0.3694420316830045, "grad_norm": 700.932861328125, "learning_rate": 8.066067349570757e-06, "loss": 86.4731, "step": 44660 }, { "epoch": 0.36952475493237374, "grad_norm": 1984.68017578125, "learning_rate": 8.064953154349042e-06, "loss": 85.2434, "step": 44670 }, { "epoch": 0.369607478181743, "grad_norm": 951.8640747070312, "learning_rate": 8.063838715263072e-06, "loss": 92.2627, "step": 44680 }, { "epoch": 0.3696902014311122, "grad_norm": 991.0081176757812, "learning_rate": 8.062724032401515e-06, "loss": 82.2411, "step": 44690 }, { "epoch": 0.36977292468048145, "grad_norm": 915.6588134765625, "learning_rate": 8.061609105853062e-06, "loss": 123.3313, "step": 44700 }, { "epoch": 0.3698556479298507, "grad_norm": 648.7603759765625, "learning_rate": 8.060493935706425e-06, "loss": 86.3172, "step": 44710 }, { "epoch": 0.3699383711792199, "grad_norm": 731.9179077148438, "learning_rate": 8.059378522050332e-06, "loss": 142.6297, "step": 44720 }, { "epoch": 0.37002109442858916, "grad_norm": 738.2081909179688, "learning_rate": 8.05826286497353e-06, "loss": 100.2493, "step": 44730 }, { "epoch": 0.3701038176779584, "grad_norm": 961.7689208984375, "learning_rate": 8.057146964564786e-06, "loss": 108.2104, "step": 44740 }, { "epoch": 0.3701865409273276, "grad_norm": 1143.8248291015625, "learning_rate": 8.05603082091289e-06, "loss": 102.1352, "step": 44750 }, { "epoch": 0.37026926417669687, "grad_norm": 968.6405639648438, "learning_rate": 8.054914434106647e-06, "loss": 89.4334, "step": 44760 }, { "epoch": 0.3703519874260661, "grad_norm": 800.0516967773438, "learning_rate": 8.053797804234882e-06, "loss": 100.4505, "step": 44770 }, { "epoch": 0.3704347106754353, "grad_norm": 928.8748168945312, "learning_rate": 8.052680931386441e-06, "loss": 88.3113, "step": 44780 }, { "epoch": 0.3705174339248046, "grad_norm": 680.3980102539062, "learning_rate": 8.051563815650187e-06, "loss": 93.7541, "step": 44790 }, { "epoch": 0.3706001571741738, "grad_norm": 988.8343505859375, "learning_rate": 8.050446457115005e-06, "loss": 138.652, "step": 44800 }, { "epoch": 0.370682880423543, "grad_norm": 5196.66015625, "learning_rate": 8.0493288558698e-06, "loss": 161.6813, "step": 44810 }, { "epoch": 0.3707656036729123, "grad_norm": 1195.2532958984375, "learning_rate": 8.04821101200349e-06, "loss": 104.2872, "step": 44820 }, { "epoch": 0.37084832692228153, "grad_norm": 916.4719848632812, "learning_rate": 8.047092925605022e-06, "loss": 72.5952, "step": 44830 }, { "epoch": 0.37093105017165073, "grad_norm": 1935.22119140625, "learning_rate": 8.045974596763352e-06, "loss": 124.3693, "step": 44840 }, { "epoch": 0.37101377342102, "grad_norm": 660.0169677734375, "learning_rate": 8.044856025567464e-06, "loss": 118.5706, "step": 44850 }, { "epoch": 0.3710964966703892, "grad_norm": 1358.344482421875, "learning_rate": 8.043737212106356e-06, "loss": 116.8551, "step": 44860 }, { "epoch": 0.37117921991975844, "grad_norm": 963.0214233398438, "learning_rate": 8.042618156469045e-06, "loss": 88.5938, "step": 44870 }, { "epoch": 0.3712619431691277, "grad_norm": 718.8231811523438, "learning_rate": 8.041498858744572e-06, "loss": 93.5438, "step": 44880 }, { "epoch": 0.3713446664184969, "grad_norm": 884.6295776367188, "learning_rate": 8.040379319021994e-06, "loss": 98.0878, "step": 44890 }, { "epoch": 0.37142738966786615, "grad_norm": 818.326171875, "learning_rate": 8.039259537390388e-06, "loss": 106.2791, "step": 44900 }, { "epoch": 0.3715101129172354, "grad_norm": 722.222900390625, "learning_rate": 8.038139513938847e-06, "loss": 101.4262, "step": 44910 }, { "epoch": 0.3715928361666046, "grad_norm": 706.7052001953125, "learning_rate": 8.037019248756488e-06, "loss": 103.0186, "step": 44920 }, { "epoch": 0.37167555941597386, "grad_norm": 783.2863159179688, "learning_rate": 8.035898741932447e-06, "loss": 101.9469, "step": 44930 }, { "epoch": 0.3717582826653431, "grad_norm": 1484.9461669921875, "learning_rate": 8.034777993555875e-06, "loss": 140.325, "step": 44940 }, { "epoch": 0.3718410059147123, "grad_norm": 669.56640625, "learning_rate": 8.033657003715945e-06, "loss": 116.9738, "step": 44950 }, { "epoch": 0.37192372916408156, "grad_norm": 705.1654663085938, "learning_rate": 8.032535772501851e-06, "loss": 102.4115, "step": 44960 }, { "epoch": 0.3720064524134508, "grad_norm": 1263.1771240234375, "learning_rate": 8.031414300002802e-06, "loss": 77.7587, "step": 44970 }, { "epoch": 0.37208917566282, "grad_norm": 1212.223876953125, "learning_rate": 8.03029258630803e-06, "loss": 112.9885, "step": 44980 }, { "epoch": 0.37217189891218927, "grad_norm": 951.40185546875, "learning_rate": 8.029170631506785e-06, "loss": 95.1384, "step": 44990 }, { "epoch": 0.3722546221615585, "grad_norm": 1134.679443359375, "learning_rate": 8.028048435688333e-06, "loss": 106.5822, "step": 45000 }, { "epoch": 0.3723373454109277, "grad_norm": 932.267822265625, "learning_rate": 8.026925998941965e-06, "loss": 110.1278, "step": 45010 }, { "epoch": 0.372420068660297, "grad_norm": 457.553955078125, "learning_rate": 8.025803321356989e-06, "loss": 95.6591, "step": 45020 }, { "epoch": 0.37250279190966623, "grad_norm": 655.8544921875, "learning_rate": 8.024680403022726e-06, "loss": 89.4721, "step": 45030 }, { "epoch": 0.37258551515903543, "grad_norm": 893.6047973632812, "learning_rate": 8.023557244028526e-06, "loss": 101.3761, "step": 45040 }, { "epoch": 0.3726682384084047, "grad_norm": 765.0892333984375, "learning_rate": 8.022433844463752e-06, "loss": 85.2344, "step": 45050 }, { "epoch": 0.37275096165777394, "grad_norm": 955.836181640625, "learning_rate": 8.02131020441779e-06, "loss": 105.1853, "step": 45060 }, { "epoch": 0.37283368490714314, "grad_norm": 1159.584228515625, "learning_rate": 8.02018632398004e-06, "loss": 82.6549, "step": 45070 }, { "epoch": 0.3729164081565124, "grad_norm": 827.6931762695312, "learning_rate": 8.019062203239923e-06, "loss": 145.6601, "step": 45080 }, { "epoch": 0.37299913140588165, "grad_norm": 1674.2239990234375, "learning_rate": 8.017937842286882e-06, "loss": 104.1544, "step": 45090 }, { "epoch": 0.37308185465525084, "grad_norm": 1010.4392700195312, "learning_rate": 8.01681324121038e-06, "loss": 102.7155, "step": 45100 }, { "epoch": 0.3731645779046201, "grad_norm": 812.0460205078125, "learning_rate": 8.015688400099893e-06, "loss": 80.9767, "step": 45110 }, { "epoch": 0.37324730115398935, "grad_norm": 676.9262084960938, "learning_rate": 8.014563319044919e-06, "loss": 107.2131, "step": 45120 }, { "epoch": 0.37333002440335855, "grad_norm": 1148.935302734375, "learning_rate": 8.013437998134978e-06, "loss": 96.8717, "step": 45130 }, { "epoch": 0.3734127476527278, "grad_norm": 2210.831298828125, "learning_rate": 8.012312437459604e-06, "loss": 115.9179, "step": 45140 }, { "epoch": 0.37349547090209706, "grad_norm": 740.3560180664062, "learning_rate": 8.011186637108354e-06, "loss": 95.7675, "step": 45150 }, { "epoch": 0.37357819415146626, "grad_norm": 1315.1302490234375, "learning_rate": 8.010060597170805e-06, "loss": 108.1328, "step": 45160 }, { "epoch": 0.3736609174008355, "grad_norm": 1144.85400390625, "learning_rate": 8.008934317736546e-06, "loss": 104.8319, "step": 45170 }, { "epoch": 0.37374364065020477, "grad_norm": 642.925048828125, "learning_rate": 8.007807798895195e-06, "loss": 114.467, "step": 45180 }, { "epoch": 0.37382636389957397, "grad_norm": 1433.939208984375, "learning_rate": 8.00668104073638e-06, "loss": 142.9769, "step": 45190 }, { "epoch": 0.3739090871489432, "grad_norm": 615.9909057617188, "learning_rate": 8.005554043349753e-06, "loss": 77.9182, "step": 45200 }, { "epoch": 0.3739918103983124, "grad_norm": 1355.69287109375, "learning_rate": 8.004426806824985e-06, "loss": 98.0649, "step": 45210 }, { "epoch": 0.3740745336476817, "grad_norm": 716.5189208984375, "learning_rate": 8.003299331251764e-06, "loss": 113.2009, "step": 45220 }, { "epoch": 0.3741572568970509, "grad_norm": 860.948974609375, "learning_rate": 8.002171616719798e-06, "loss": 89.8221, "step": 45230 }, { "epoch": 0.3742399801464201, "grad_norm": 1081.593994140625, "learning_rate": 8.001043663318815e-06, "loss": 148.4487, "step": 45240 }, { "epoch": 0.3743227033957894, "grad_norm": 714.3857421875, "learning_rate": 7.999915471138562e-06, "loss": 111.338, "step": 45250 }, { "epoch": 0.37440542664515863, "grad_norm": 863.04052734375, "learning_rate": 7.9987870402688e-06, "loss": 123.2091, "step": 45260 }, { "epoch": 0.37448814989452783, "grad_norm": 541.889404296875, "learning_rate": 7.997658370799318e-06, "loss": 122.7542, "step": 45270 }, { "epoch": 0.3745708731438971, "grad_norm": 948.584716796875, "learning_rate": 7.996529462819915e-06, "loss": 119.426, "step": 45280 }, { "epoch": 0.37465359639326634, "grad_norm": 736.9681396484375, "learning_rate": 7.995400316420416e-06, "loss": 64.0782, "step": 45290 }, { "epoch": 0.37473631964263554, "grad_norm": 1551.1883544921875, "learning_rate": 7.994270931690662e-06, "loss": 136.7847, "step": 45300 }, { "epoch": 0.3748190428920048, "grad_norm": 804.4282836914062, "learning_rate": 7.993141308720511e-06, "loss": 136.6521, "step": 45310 }, { "epoch": 0.37490176614137405, "grad_norm": 778.236083984375, "learning_rate": 7.99201144759984e-06, "loss": 95.8208, "step": 45320 }, { "epoch": 0.37498448939074325, "grad_norm": 2299.00048828125, "learning_rate": 7.990881348418554e-06, "loss": 124.4509, "step": 45330 }, { "epoch": 0.3750672126401125, "grad_norm": 973.8239135742188, "learning_rate": 7.989751011266565e-06, "loss": 112.5193, "step": 45340 }, { "epoch": 0.37514993588948176, "grad_norm": 745.3870239257812, "learning_rate": 7.988620436233806e-06, "loss": 112.1299, "step": 45350 }, { "epoch": 0.37523265913885095, "grad_norm": 2419.01953125, "learning_rate": 7.987489623410236e-06, "loss": 130.317, "step": 45360 }, { "epoch": 0.3753153823882202, "grad_norm": 2003.9739990234375, "learning_rate": 7.986358572885828e-06, "loss": 118.5811, "step": 45370 }, { "epoch": 0.37539810563758946, "grad_norm": 1251.0047607421875, "learning_rate": 7.985227284750574e-06, "loss": 106.9141, "step": 45380 }, { "epoch": 0.37548082888695866, "grad_norm": 879.7947998046875, "learning_rate": 7.984095759094485e-06, "loss": 93.4843, "step": 45390 }, { "epoch": 0.3755635521363279, "grad_norm": 852.9107055664062, "learning_rate": 7.982963996007591e-06, "loss": 141.075, "step": 45400 }, { "epoch": 0.37564627538569717, "grad_norm": 952.68798828125, "learning_rate": 7.981831995579943e-06, "loss": 91.4658, "step": 45410 }, { "epoch": 0.37572899863506637, "grad_norm": 1191.1885986328125, "learning_rate": 7.980699757901607e-06, "loss": 80.4354, "step": 45420 }, { "epoch": 0.3758117218844356, "grad_norm": 813.9268188476562, "learning_rate": 7.97956728306267e-06, "loss": 90.7331, "step": 45430 }, { "epoch": 0.3758944451338049, "grad_norm": 697.1983032226562, "learning_rate": 7.97843457115324e-06, "loss": 98.3303, "step": 45440 }, { "epoch": 0.3759771683831741, "grad_norm": 628.31982421875, "learning_rate": 7.97730162226344e-06, "loss": 103.7881, "step": 45450 }, { "epoch": 0.37605989163254333, "grad_norm": 2688.026123046875, "learning_rate": 7.976168436483415e-06, "loss": 97.9229, "step": 45460 }, { "epoch": 0.3761426148819126, "grad_norm": 813.2411499023438, "learning_rate": 7.975035013903326e-06, "loss": 94.9314, "step": 45470 }, { "epoch": 0.3762253381312818, "grad_norm": 712.8557739257812, "learning_rate": 7.973901354613353e-06, "loss": 85.9302, "step": 45480 }, { "epoch": 0.37630806138065104, "grad_norm": 1530.84033203125, "learning_rate": 7.972767458703697e-06, "loss": 128.0701, "step": 45490 }, { "epoch": 0.3763907846300203, "grad_norm": 1192.7733154296875, "learning_rate": 7.971633326264581e-06, "loss": 99.9536, "step": 45500 }, { "epoch": 0.3764735078793895, "grad_norm": 854.33203125, "learning_rate": 7.970498957386237e-06, "loss": 121.3529, "step": 45510 }, { "epoch": 0.37655623112875874, "grad_norm": 673.5110473632812, "learning_rate": 7.969364352158922e-06, "loss": 102.1387, "step": 45520 }, { "epoch": 0.376638954378128, "grad_norm": 868.6480712890625, "learning_rate": 7.968229510672915e-06, "loss": 97.5747, "step": 45530 }, { "epoch": 0.3767216776274972, "grad_norm": 1399.73779296875, "learning_rate": 7.967094433018508e-06, "loss": 125.0937, "step": 45540 }, { "epoch": 0.37680440087686645, "grad_norm": 1239.8349609375, "learning_rate": 7.965959119286013e-06, "loss": 113.5951, "step": 45550 }, { "epoch": 0.37688712412623565, "grad_norm": 1359.953125, "learning_rate": 7.964823569565765e-06, "loss": 103.3041, "step": 45560 }, { "epoch": 0.3769698473756049, "grad_norm": 720.0075073242188, "learning_rate": 7.963687783948111e-06, "loss": 81.487, "step": 45570 }, { "epoch": 0.37705257062497416, "grad_norm": 1454.417724609375, "learning_rate": 7.96255176252342e-06, "loss": 83.4532, "step": 45580 }, { "epoch": 0.37713529387434336, "grad_norm": 1103.7496337890625, "learning_rate": 7.961415505382083e-06, "loss": 125.8114, "step": 45590 }, { "epoch": 0.3772180171237126, "grad_norm": 948.4056396484375, "learning_rate": 7.960279012614508e-06, "loss": 90.2628, "step": 45600 }, { "epoch": 0.37730074037308187, "grad_norm": 1128.2615966796875, "learning_rate": 7.959142284311115e-06, "loss": 113.3847, "step": 45610 }, { "epoch": 0.37738346362245107, "grad_norm": 921.497802734375, "learning_rate": 7.958005320562349e-06, "loss": 125.8096, "step": 45620 }, { "epoch": 0.3774661868718203, "grad_norm": 1018.8650512695312, "learning_rate": 7.95686812145868e-06, "loss": 122.7865, "step": 45630 }, { "epoch": 0.3775489101211896, "grad_norm": 979.5180053710938, "learning_rate": 7.955730687090582e-06, "loss": 125.0732, "step": 45640 }, { "epoch": 0.3776316333705588, "grad_norm": 978.5775756835938, "learning_rate": 7.954593017548557e-06, "loss": 87.2099, "step": 45650 }, { "epoch": 0.377714356619928, "grad_norm": 978.8362426757812, "learning_rate": 7.953455112923127e-06, "loss": 134.838, "step": 45660 }, { "epoch": 0.3777970798692973, "grad_norm": 961.3007202148438, "learning_rate": 7.952316973304828e-06, "loss": 109.7581, "step": 45670 }, { "epoch": 0.3778798031186665, "grad_norm": 872.6028442382812, "learning_rate": 7.951178598784217e-06, "loss": 94.058, "step": 45680 }, { "epoch": 0.37796252636803573, "grad_norm": 603.7721557617188, "learning_rate": 7.950039989451868e-06, "loss": 142.0334, "step": 45690 }, { "epoch": 0.378045249617405, "grad_norm": 769.1307373046875, "learning_rate": 7.948901145398376e-06, "loss": 101.5725, "step": 45700 }, { "epoch": 0.3781279728667742, "grad_norm": 523.3740844726562, "learning_rate": 7.947762066714353e-06, "loss": 99.7676, "step": 45710 }, { "epoch": 0.37821069611614344, "grad_norm": 569.120361328125, "learning_rate": 7.946622753490433e-06, "loss": 73.9602, "step": 45720 }, { "epoch": 0.3782934193655127, "grad_norm": 850.3909912109375, "learning_rate": 7.945483205817262e-06, "loss": 99.4327, "step": 45730 }, { "epoch": 0.3783761426148819, "grad_norm": 1302.0482177734375, "learning_rate": 7.94434342378551e-06, "loss": 132.1461, "step": 45740 }, { "epoch": 0.37845886586425115, "grad_norm": 698.3201293945312, "learning_rate": 7.943203407485864e-06, "loss": 69.9039, "step": 45750 }, { "epoch": 0.3785415891136204, "grad_norm": 911.8252563476562, "learning_rate": 7.942063157009033e-06, "loss": 102.4791, "step": 45760 }, { "epoch": 0.3786243123629896, "grad_norm": 991.9946899414062, "learning_rate": 7.940922672445737e-06, "loss": 113.1581, "step": 45770 }, { "epoch": 0.37870703561235886, "grad_norm": 1038.773681640625, "learning_rate": 7.939781953886722e-06, "loss": 128.2211, "step": 45780 }, { "epoch": 0.3787897588617281, "grad_norm": 1063.1624755859375, "learning_rate": 7.938641001422747e-06, "loss": 106.1225, "step": 45790 }, { "epoch": 0.3788724821110973, "grad_norm": 897.7556762695312, "learning_rate": 7.937499815144597e-06, "loss": 102.7701, "step": 45800 }, { "epoch": 0.37895520536046656, "grad_norm": 454.84552001953125, "learning_rate": 7.936358395143065e-06, "loss": 133.0522, "step": 45810 }, { "epoch": 0.3790379286098358, "grad_norm": 491.5146484375, "learning_rate": 7.935216741508971e-06, "loss": 131.0702, "step": 45820 }, { "epoch": 0.379120651859205, "grad_norm": 816.953125, "learning_rate": 7.934074854333153e-06, "loss": 102.6913, "step": 45830 }, { "epoch": 0.37920337510857427, "grad_norm": 991.1273193359375, "learning_rate": 7.932932733706467e-06, "loss": 96.2913, "step": 45840 }, { "epoch": 0.3792860983579435, "grad_norm": 2715.989501953125, "learning_rate": 7.931790379719781e-06, "loss": 145.7241, "step": 45850 }, { "epoch": 0.3793688216073127, "grad_norm": 733.826416015625, "learning_rate": 7.93064779246399e-06, "loss": 79.5076, "step": 45860 }, { "epoch": 0.379451544856682, "grad_norm": 1925.5067138671875, "learning_rate": 7.929504972030003e-06, "loss": 129.3025, "step": 45870 }, { "epoch": 0.37953426810605123, "grad_norm": 723.109130859375, "learning_rate": 7.928361918508752e-06, "loss": 124.5502, "step": 45880 }, { "epoch": 0.37961699135542043, "grad_norm": 942.7910766601562, "learning_rate": 7.927218631991182e-06, "loss": 87.0695, "step": 45890 }, { "epoch": 0.3796997146047897, "grad_norm": 1327.8336181640625, "learning_rate": 7.92607511256826e-06, "loss": 90.3665, "step": 45900 }, { "epoch": 0.37978243785415894, "grad_norm": 1204.0654296875, "learning_rate": 7.924931360330968e-06, "loss": 120.6505, "step": 45910 }, { "epoch": 0.37986516110352814, "grad_norm": 643.5101928710938, "learning_rate": 7.92378737537031e-06, "loss": 84.0929, "step": 45920 }, { "epoch": 0.3799478843528974, "grad_norm": 1710.7091064453125, "learning_rate": 7.922643157777314e-06, "loss": 94.1093, "step": 45930 }, { "epoch": 0.3800306076022666, "grad_norm": 1306.8468017578125, "learning_rate": 7.921498707643011e-06, "loss": 110.1609, "step": 45940 }, { "epoch": 0.38011333085163584, "grad_norm": 1266.775146484375, "learning_rate": 7.920354025058467e-06, "loss": 94.9035, "step": 45950 }, { "epoch": 0.3801960541010051, "grad_norm": 1168.3138427734375, "learning_rate": 7.919209110114752e-06, "loss": 165.2224, "step": 45960 }, { "epoch": 0.3802787773503743, "grad_norm": 760.196044921875, "learning_rate": 7.918063962902968e-06, "loss": 118.3697, "step": 45970 }, { "epoch": 0.38036150059974355, "grad_norm": 1145.4422607421875, "learning_rate": 7.916918583514227e-06, "loss": 102.1873, "step": 45980 }, { "epoch": 0.3804442238491128, "grad_norm": 523.3143310546875, "learning_rate": 7.91577297203966e-06, "loss": 94.5064, "step": 45990 }, { "epoch": 0.380526947098482, "grad_norm": 777.9324951171875, "learning_rate": 7.91462712857042e-06, "loss": 99.4327, "step": 46000 }, { "epoch": 0.38060967034785126, "grad_norm": 1038.9837646484375, "learning_rate": 7.913481053197673e-06, "loss": 120.6933, "step": 46010 }, { "epoch": 0.3806923935972205, "grad_norm": 702.442138671875, "learning_rate": 7.912334746012613e-06, "loss": 141.1689, "step": 46020 }, { "epoch": 0.3807751168465897, "grad_norm": 910.6521606445312, "learning_rate": 7.911188207106442e-06, "loss": 84.091, "step": 46030 }, { "epoch": 0.38085784009595897, "grad_norm": 836.0477905273438, "learning_rate": 7.910041436570386e-06, "loss": 92.7803, "step": 46040 }, { "epoch": 0.3809405633453282, "grad_norm": 796.7591552734375, "learning_rate": 7.90889443449569e-06, "loss": 93.9601, "step": 46050 }, { "epoch": 0.3810232865946974, "grad_norm": 1023.9012451171875, "learning_rate": 7.90774720097361e-06, "loss": 125.3338, "step": 46060 }, { "epoch": 0.3811060098440667, "grad_norm": 1169.42822265625, "learning_rate": 7.906599736095433e-06, "loss": 89.7407, "step": 46070 }, { "epoch": 0.3811887330934359, "grad_norm": 924.0548706054688, "learning_rate": 7.905452039952453e-06, "loss": 86.3304, "step": 46080 }, { "epoch": 0.3812714563428051, "grad_norm": 595.2965087890625, "learning_rate": 7.904304112635987e-06, "loss": 106.9174, "step": 46090 }, { "epoch": 0.3813541795921744, "grad_norm": 472.991943359375, "learning_rate": 7.903155954237375e-06, "loss": 120.8275, "step": 46100 }, { "epoch": 0.38143690284154363, "grad_norm": 1009.1423950195312, "learning_rate": 7.902007564847967e-06, "loss": 105.188, "step": 46110 }, { "epoch": 0.38151962609091283, "grad_norm": 851.7573852539062, "learning_rate": 7.900858944559133e-06, "loss": 111.3603, "step": 46120 }, { "epoch": 0.3816023493402821, "grad_norm": 981.0646362304688, "learning_rate": 7.899710093462267e-06, "loss": 86.6864, "step": 46130 }, { "epoch": 0.38168507258965134, "grad_norm": 864.4039306640625, "learning_rate": 7.898561011648777e-06, "loss": 155.4255, "step": 46140 }, { "epoch": 0.38176779583902054, "grad_norm": 1161.7957763671875, "learning_rate": 7.89741169921009e-06, "loss": 92.5417, "step": 46150 }, { "epoch": 0.3818505190883898, "grad_norm": 1559.1328125, "learning_rate": 7.896262156237652e-06, "loss": 109.0317, "step": 46160 }, { "epoch": 0.38193324233775905, "grad_norm": 839.4773559570312, "learning_rate": 7.895112382822925e-06, "loss": 124.5884, "step": 46170 }, { "epoch": 0.38201596558712825, "grad_norm": 1135.74658203125, "learning_rate": 7.893962379057393e-06, "loss": 115.562, "step": 46180 }, { "epoch": 0.3820986888364975, "grad_norm": 979.9491577148438, "learning_rate": 7.892812145032557e-06, "loss": 118.3164, "step": 46190 }, { "epoch": 0.38218141208586676, "grad_norm": 4294.3427734375, "learning_rate": 7.891661680839932e-06, "loss": 125.0205, "step": 46200 }, { "epoch": 0.38226413533523596, "grad_norm": 933.6452026367188, "learning_rate": 7.89051098657106e-06, "loss": 121.8078, "step": 46210 }, { "epoch": 0.3823468585846052, "grad_norm": 527.7791137695312, "learning_rate": 7.889360062317495e-06, "loss": 96.7531, "step": 46220 }, { "epoch": 0.38242958183397446, "grad_norm": 777.3706665039062, "learning_rate": 7.888208908170812e-06, "loss": 134.1928, "step": 46230 }, { "epoch": 0.38251230508334366, "grad_norm": 805.9109497070312, "learning_rate": 7.887057524222596e-06, "loss": 101.3832, "step": 46240 }, { "epoch": 0.3825950283327129, "grad_norm": 1473.904296875, "learning_rate": 7.885905910564466e-06, "loss": 87.6336, "step": 46250 }, { "epoch": 0.38267775158208217, "grad_norm": 784.258056640625, "learning_rate": 7.884754067288047e-06, "loss": 94.394, "step": 46260 }, { "epoch": 0.38276047483145137, "grad_norm": 1091.794677734375, "learning_rate": 7.883601994484986e-06, "loss": 106.1401, "step": 46270 }, { "epoch": 0.3828431980808206, "grad_norm": 1147.5867919921875, "learning_rate": 7.882449692246948e-06, "loss": 104.8446, "step": 46280 }, { "epoch": 0.3829259213301898, "grad_norm": 1361.256591796875, "learning_rate": 7.881297160665616e-06, "loss": 97.2336, "step": 46290 }, { "epoch": 0.3830086445795591, "grad_norm": 1118.20556640625, "learning_rate": 7.880144399832693e-06, "loss": 109.2898, "step": 46300 }, { "epoch": 0.38309136782892833, "grad_norm": 1180.1666259765625, "learning_rate": 7.878991409839897e-06, "loss": 107.2897, "step": 46310 }, { "epoch": 0.38317409107829753, "grad_norm": 976.9520874023438, "learning_rate": 7.87783819077897e-06, "loss": 127.2354, "step": 46320 }, { "epoch": 0.3832568143276668, "grad_norm": 569.0108032226562, "learning_rate": 7.876684742741665e-06, "loss": 89.219, "step": 46330 }, { "epoch": 0.38333953757703604, "grad_norm": 1490.4720458984375, "learning_rate": 7.875531065819755e-06, "loss": 88.2369, "step": 46340 }, { "epoch": 0.38342226082640524, "grad_norm": 1101.564697265625, "learning_rate": 7.874377160105037e-06, "loss": 104.7651, "step": 46350 }, { "epoch": 0.3835049840757745, "grad_norm": 1799.0706787109375, "learning_rate": 7.873223025689319e-06, "loss": 138.4782, "step": 46360 }, { "epoch": 0.38358770732514375, "grad_norm": 1268.860595703125, "learning_rate": 7.872068662664432e-06, "loss": 91.4783, "step": 46370 }, { "epoch": 0.38367043057451294, "grad_norm": 1431.305419921875, "learning_rate": 7.870914071122222e-06, "loss": 100.8851, "step": 46380 }, { "epoch": 0.3837531538238822, "grad_norm": 1034.6007080078125, "learning_rate": 7.869759251154554e-06, "loss": 94.5458, "step": 46390 }, { "epoch": 0.38383587707325145, "grad_norm": 1329.562255859375, "learning_rate": 7.868604202853314e-06, "loss": 95.9945, "step": 46400 }, { "epoch": 0.38391860032262065, "grad_norm": 755.7239379882812, "learning_rate": 7.867448926310403e-06, "loss": 106.9036, "step": 46410 }, { "epoch": 0.3840013235719899, "grad_norm": 1034.0926513671875, "learning_rate": 7.866293421617741e-06, "loss": 94.1212, "step": 46420 }, { "epoch": 0.38408404682135916, "grad_norm": 1169.0289306640625, "learning_rate": 7.865137688867264e-06, "loss": 76.2746, "step": 46430 }, { "epoch": 0.38416677007072836, "grad_norm": 1374.6610107421875, "learning_rate": 7.86398172815093e-06, "loss": 141.711, "step": 46440 }, { "epoch": 0.3842494933200976, "grad_norm": 715.2680053710938, "learning_rate": 7.862825539560716e-06, "loss": 127.8841, "step": 46450 }, { "epoch": 0.38433221656946687, "grad_norm": 1387.7430419921875, "learning_rate": 7.861669123188613e-06, "loss": 124.0174, "step": 46460 }, { "epoch": 0.38441493981883607, "grad_norm": 826.4485473632812, "learning_rate": 7.86051247912663e-06, "loss": 110.8221, "step": 46470 }, { "epoch": 0.3844976630682053, "grad_norm": 807.095703125, "learning_rate": 7.859355607466797e-06, "loss": 87.1465, "step": 46480 }, { "epoch": 0.3845803863175746, "grad_norm": 1193.6893310546875, "learning_rate": 7.858198508301161e-06, "loss": 133.6182, "step": 46490 }, { "epoch": 0.3846631095669438, "grad_norm": 527.4124755859375, "learning_rate": 7.857041181721788e-06, "loss": 97.8456, "step": 46500 }, { "epoch": 0.384745832816313, "grad_norm": 862.8912963867188, "learning_rate": 7.855883627820757e-06, "loss": 134.22, "step": 46510 }, { "epoch": 0.3848285560656823, "grad_norm": 548.6507568359375, "learning_rate": 7.854725846690175e-06, "loss": 83.1777, "step": 46520 }, { "epoch": 0.3849112793150515, "grad_norm": 902.584228515625, "learning_rate": 7.85356783842216e-06, "loss": 106.9863, "step": 46530 }, { "epoch": 0.38499400256442073, "grad_norm": 535.5631103515625, "learning_rate": 7.852409603108845e-06, "loss": 113.9048, "step": 46540 }, { "epoch": 0.38507672581379, "grad_norm": 938.954345703125, "learning_rate": 7.85125114084239e-06, "loss": 105.1793, "step": 46550 }, { "epoch": 0.3851594490631592, "grad_norm": 1107.5294189453125, "learning_rate": 7.850092451714967e-06, "loss": 115.7462, "step": 46560 }, { "epoch": 0.38524217231252844, "grad_norm": 1338.5953369140625, "learning_rate": 7.84893353581877e-06, "loss": 93.1394, "step": 46570 }, { "epoch": 0.3853248955618977, "grad_norm": 1115.7308349609375, "learning_rate": 7.847774393246005e-06, "loss": 117.0743, "step": 46580 }, { "epoch": 0.3854076188112669, "grad_norm": 712.0968017578125, "learning_rate": 7.8466150240889e-06, "loss": 92.4161, "step": 46590 }, { "epoch": 0.38549034206063615, "grad_norm": 1130.7666015625, "learning_rate": 7.845455428439703e-06, "loss": 90.2969, "step": 46600 }, { "epoch": 0.3855730653100054, "grad_norm": 694.2098388671875, "learning_rate": 7.844295606390675e-06, "loss": 106.4844, "step": 46610 }, { "epoch": 0.3856557885593746, "grad_norm": 758.242431640625, "learning_rate": 7.843135558034101e-06, "loss": 85.5782, "step": 46620 }, { "epoch": 0.38573851180874386, "grad_norm": 847.0222778320312, "learning_rate": 7.841975283462278e-06, "loss": 83.5226, "step": 46630 }, { "epoch": 0.3858212350581131, "grad_norm": 828.2359619140625, "learning_rate": 7.840814782767525e-06, "loss": 74.5017, "step": 46640 }, { "epoch": 0.3859039583074823, "grad_norm": 575.1474609375, "learning_rate": 7.839654056042176e-06, "loss": 84.7638, "step": 46650 }, { "epoch": 0.38598668155685156, "grad_norm": 961.8069458007812, "learning_rate": 7.838493103378588e-06, "loss": 105.9289, "step": 46660 }, { "epoch": 0.38606940480622076, "grad_norm": 323.90765380859375, "learning_rate": 7.83733192486913e-06, "loss": 134.9893, "step": 46670 }, { "epoch": 0.38615212805559, "grad_norm": 906.62060546875, "learning_rate": 7.836170520606191e-06, "loss": 132.9467, "step": 46680 }, { "epoch": 0.38623485130495927, "grad_norm": 1547.6925048828125, "learning_rate": 7.83500889068218e-06, "loss": 145.7491, "step": 46690 }, { "epoch": 0.38631757455432847, "grad_norm": 1164.046142578125, "learning_rate": 7.833847035189524e-06, "loss": 110.9907, "step": 46700 }, { "epoch": 0.3864002978036977, "grad_norm": 1105.657470703125, "learning_rate": 7.832684954220664e-06, "loss": 124.7499, "step": 46710 }, { "epoch": 0.386483021053067, "grad_norm": 508.67803955078125, "learning_rate": 7.831522647868064e-06, "loss": 95.8649, "step": 46720 }, { "epoch": 0.3865657443024362, "grad_norm": 674.171875, "learning_rate": 7.8303601162242e-06, "loss": 99.7405, "step": 46730 }, { "epoch": 0.38664846755180543, "grad_norm": 1009.1093139648438, "learning_rate": 7.829197359381571e-06, "loss": 111.1325, "step": 46740 }, { "epoch": 0.3867311908011747, "grad_norm": 1165.2833251953125, "learning_rate": 7.828034377432694e-06, "loss": 111.5451, "step": 46750 }, { "epoch": 0.3868139140505439, "grad_norm": 542.6980590820312, "learning_rate": 7.826871170470099e-06, "loss": 93.1879, "step": 46760 }, { "epoch": 0.38689663729991314, "grad_norm": 1133.044921875, "learning_rate": 7.82570773858634e-06, "loss": 90.8988, "step": 46770 }, { "epoch": 0.3869793605492824, "grad_norm": 684.3965454101562, "learning_rate": 7.824544081873984e-06, "loss": 79.4068, "step": 46780 }, { "epoch": 0.3870620837986516, "grad_norm": 1281.01513671875, "learning_rate": 7.823380200425618e-06, "loss": 132.8948, "step": 46790 }, { "epoch": 0.38714480704802084, "grad_norm": 1106.5760498046875, "learning_rate": 7.822216094333847e-06, "loss": 109.8843, "step": 46800 }, { "epoch": 0.3872275302973901, "grad_norm": 736.9039306640625, "learning_rate": 7.821051763691293e-06, "loss": 95.4937, "step": 46810 }, { "epoch": 0.3873102535467593, "grad_norm": 983.0117797851562, "learning_rate": 7.819887208590597e-06, "loss": 106.8212, "step": 46820 }, { "epoch": 0.38739297679612855, "grad_norm": 900.6527099609375, "learning_rate": 7.818722429124418e-06, "loss": 97.8815, "step": 46830 }, { "epoch": 0.3874757000454978, "grad_norm": 850.8548583984375, "learning_rate": 7.817557425385433e-06, "loss": 81.9495, "step": 46840 }, { "epoch": 0.387558423294867, "grad_norm": 855.3139038085938, "learning_rate": 7.816392197466333e-06, "loss": 89.8393, "step": 46850 }, { "epoch": 0.38764114654423626, "grad_norm": 587.1494750976562, "learning_rate": 7.815226745459831e-06, "loss": 106.3622, "step": 46860 }, { "epoch": 0.3877238697936055, "grad_norm": 3245.526123046875, "learning_rate": 7.814061069458657e-06, "loss": 111.3302, "step": 46870 }, { "epoch": 0.3878065930429747, "grad_norm": 547.8377685546875, "learning_rate": 7.81289516955556e-06, "loss": 78.6426, "step": 46880 }, { "epoch": 0.38788931629234397, "grad_norm": 1064.7379150390625, "learning_rate": 7.811729045843303e-06, "loss": 90.7304, "step": 46890 }, { "epoch": 0.3879720395417132, "grad_norm": 616.6322631835938, "learning_rate": 7.81056269841467e-06, "loss": 107.5491, "step": 46900 }, { "epoch": 0.3880547627910824, "grad_norm": 1086.90185546875, "learning_rate": 7.80939612736246e-06, "loss": 112.952, "step": 46910 }, { "epoch": 0.3881374860404517, "grad_norm": 762.38916015625, "learning_rate": 7.808229332779496e-06, "loss": 93.649, "step": 46920 }, { "epoch": 0.38822020928982093, "grad_norm": 1226.2991943359375, "learning_rate": 7.807062314758612e-06, "loss": 122.8837, "step": 46930 }, { "epoch": 0.3883029325391901, "grad_norm": 980.0850219726562, "learning_rate": 7.80589507339266e-06, "loss": 118.3508, "step": 46940 }, { "epoch": 0.3883856557885594, "grad_norm": 899.0253295898438, "learning_rate": 7.804727608774516e-06, "loss": 95.3277, "step": 46950 }, { "epoch": 0.38846837903792864, "grad_norm": 992.0533447265625, "learning_rate": 7.803559920997067e-06, "loss": 100.9585, "step": 46960 }, { "epoch": 0.38855110228729783, "grad_norm": 1150.515869140625, "learning_rate": 7.802392010153223e-06, "loss": 106.7416, "step": 46970 }, { "epoch": 0.3886338255366671, "grad_norm": 392.8724670410156, "learning_rate": 7.801223876335907e-06, "loss": 81.8418, "step": 46980 }, { "epoch": 0.38871654878603634, "grad_norm": 958.8024291992188, "learning_rate": 7.800055519638064e-06, "loss": 104.2003, "step": 46990 }, { "epoch": 0.38879927203540554, "grad_norm": 1033.255859375, "learning_rate": 7.798886940152654e-06, "loss": 111.1013, "step": 47000 }, { "epoch": 0.3888819952847748, "grad_norm": 670.1970825195312, "learning_rate": 7.797718137972654e-06, "loss": 108.9194, "step": 47010 }, { "epoch": 0.388964718534144, "grad_norm": 927.21630859375, "learning_rate": 7.79654911319106e-06, "loss": 107.1055, "step": 47020 }, { "epoch": 0.38904744178351325, "grad_norm": 1264.6552734375, "learning_rate": 7.795379865900892e-06, "loss": 131.0568, "step": 47030 }, { "epoch": 0.3891301650328825, "grad_norm": 1989.842529296875, "learning_rate": 7.794210396195175e-06, "loss": 94.6299, "step": 47040 }, { "epoch": 0.3892128882822517, "grad_norm": 530.0291748046875, "learning_rate": 7.79304070416696e-06, "loss": 121.8401, "step": 47050 }, { "epoch": 0.38929561153162096, "grad_norm": 936.9892578125, "learning_rate": 7.791870789909315e-06, "loss": 89.362, "step": 47060 }, { "epoch": 0.3893783347809902, "grad_norm": 2093.591796875, "learning_rate": 7.790700653515324e-06, "loss": 90.1807, "step": 47070 }, { "epoch": 0.3894610580303594, "grad_norm": 549.9674682617188, "learning_rate": 7.789530295078089e-06, "loss": 100.1859, "step": 47080 }, { "epoch": 0.38954378127972866, "grad_norm": 882.615966796875, "learning_rate": 7.788359714690732e-06, "loss": 117.6557, "step": 47090 }, { "epoch": 0.3896265045290979, "grad_norm": 798.0199584960938, "learning_rate": 7.787188912446389e-06, "loss": 127.0244, "step": 47100 }, { "epoch": 0.3897092277784671, "grad_norm": 776.5193481445312, "learning_rate": 7.786017888438214e-06, "loss": 126.0215, "step": 47110 }, { "epoch": 0.38979195102783637, "grad_norm": 654.182373046875, "learning_rate": 7.784846642759383e-06, "loss": 82.0503, "step": 47120 }, { "epoch": 0.3898746742772056, "grad_norm": 822.113525390625, "learning_rate": 7.783675175503087e-06, "loss": 89.8243, "step": 47130 }, { "epoch": 0.3899573975265748, "grad_norm": 865.35546875, "learning_rate": 7.78250348676253e-06, "loss": 98.9063, "step": 47140 }, { "epoch": 0.3900401207759441, "grad_norm": 1154.8814697265625, "learning_rate": 7.781331576630941e-06, "loss": 146.3992, "step": 47150 }, { "epoch": 0.39012284402531333, "grad_norm": 1378.4947509765625, "learning_rate": 7.780159445201562e-06, "loss": 130.5315, "step": 47160 }, { "epoch": 0.39020556727468253, "grad_norm": 1242.33251953125, "learning_rate": 7.778987092567658e-06, "loss": 119.4341, "step": 47170 }, { "epoch": 0.3902882905240518, "grad_norm": 797.1754760742188, "learning_rate": 7.777814518822504e-06, "loss": 67.0835, "step": 47180 }, { "epoch": 0.39037101377342104, "grad_norm": 607.6759033203125, "learning_rate": 7.776641724059398e-06, "loss": 93.4001, "step": 47190 }, { "epoch": 0.39045373702279024, "grad_norm": 819.6945190429688, "learning_rate": 7.77546870837165e-06, "loss": 74.3229, "step": 47200 }, { "epoch": 0.3905364602721595, "grad_norm": 903.440185546875, "learning_rate": 7.774295471852596e-06, "loss": 108.8114, "step": 47210 }, { "epoch": 0.39061918352152875, "grad_norm": 785.1321411132812, "learning_rate": 7.773122014595584e-06, "loss": 169.5685, "step": 47220 }, { "epoch": 0.39070190677089794, "grad_norm": 1930.775146484375, "learning_rate": 7.771948336693983e-06, "loss": 108.4483, "step": 47230 }, { "epoch": 0.3907846300202672, "grad_norm": 515.0844116210938, "learning_rate": 7.770774438241168e-06, "loss": 67.2212, "step": 47240 }, { "epoch": 0.39086735326963645, "grad_norm": 1046.875, "learning_rate": 7.769600319330553e-06, "loss": 122.4751, "step": 47250 }, { "epoch": 0.39095007651900565, "grad_norm": 516.0284423828125, "learning_rate": 7.768425980055548e-06, "loss": 87.5364, "step": 47260 }, { "epoch": 0.3910327997683749, "grad_norm": 1055.4755859375, "learning_rate": 7.767251420509593e-06, "loss": 127.459, "step": 47270 }, { "epoch": 0.39111552301774416, "grad_norm": 1103.45361328125, "learning_rate": 7.766076640786145e-06, "loss": 91.529, "step": 47280 }, { "epoch": 0.39119824626711336, "grad_norm": 751.03759765625, "learning_rate": 7.764901640978671e-06, "loss": 97.0965, "step": 47290 }, { "epoch": 0.3912809695164826, "grad_norm": 849.8720703125, "learning_rate": 7.763726421180664e-06, "loss": 100.8384, "step": 47300 }, { "epoch": 0.39136369276585187, "grad_norm": 1227.1131591796875, "learning_rate": 7.762550981485629e-06, "loss": 85.1875, "step": 47310 }, { "epoch": 0.39144641601522107, "grad_norm": 1593.5885009765625, "learning_rate": 7.76137532198709e-06, "loss": 117.0101, "step": 47320 }, { "epoch": 0.3915291392645903, "grad_norm": 901.8778686523438, "learning_rate": 7.76019944277859e-06, "loss": 132.7024, "step": 47330 }, { "epoch": 0.3916118625139596, "grad_norm": 851.2544555664062, "learning_rate": 7.759023343953689e-06, "loss": 89.3048, "step": 47340 }, { "epoch": 0.3916945857633288, "grad_norm": 605.6761474609375, "learning_rate": 7.757847025605963e-06, "loss": 103.3425, "step": 47350 }, { "epoch": 0.391777309012698, "grad_norm": 1666.5958251953125, "learning_rate": 7.756670487829005e-06, "loss": 112.8522, "step": 47360 }, { "epoch": 0.3918600322620673, "grad_norm": 928.7994384765625, "learning_rate": 7.755493730716428e-06, "loss": 139.6545, "step": 47370 }, { "epoch": 0.3919427555114365, "grad_norm": 1118.8592529296875, "learning_rate": 7.75431675436186e-06, "loss": 101.6983, "step": 47380 }, { "epoch": 0.39202547876080573, "grad_norm": 753.40185546875, "learning_rate": 7.753139558858949e-06, "loss": 111.5847, "step": 47390 }, { "epoch": 0.39210820201017493, "grad_norm": 1253.97509765625, "learning_rate": 7.751962144301359e-06, "loss": 101.4553, "step": 47400 }, { "epoch": 0.3921909252595442, "grad_norm": 1033.0867919921875, "learning_rate": 7.75078451078277e-06, "loss": 92.0257, "step": 47410 }, { "epoch": 0.39227364850891344, "grad_norm": 747.4989624023438, "learning_rate": 7.749606658396883e-06, "loss": 100.6043, "step": 47420 }, { "epoch": 0.39235637175828264, "grad_norm": 822.0686645507812, "learning_rate": 7.748428587237412e-06, "loss": 80.0977, "step": 47430 }, { "epoch": 0.3924390950076519, "grad_norm": 1061.1436767578125, "learning_rate": 7.747250297398092e-06, "loss": 120.7229, "step": 47440 }, { "epoch": 0.39252181825702115, "grad_norm": 497.0961608886719, "learning_rate": 7.746071788972675e-06, "loss": 92.1028, "step": 47450 }, { "epoch": 0.39260454150639035, "grad_norm": 747.8739624023438, "learning_rate": 7.744893062054928e-06, "loss": 84.091, "step": 47460 }, { "epoch": 0.3926872647557596, "grad_norm": 1407.6231689453125, "learning_rate": 7.743714116738636e-06, "loss": 96.2375, "step": 47470 }, { "epoch": 0.39276998800512886, "grad_norm": 1389.636962890625, "learning_rate": 7.742534953117607e-06, "loss": 83.5301, "step": 47480 }, { "epoch": 0.39285271125449805, "grad_norm": 827.8368530273438, "learning_rate": 7.741355571285656e-06, "loss": 98.6002, "step": 47490 }, { "epoch": 0.3929354345038673, "grad_norm": 1032.63330078125, "learning_rate": 7.740175971336624e-06, "loss": 111.3695, "step": 47500 }, { "epoch": 0.39301815775323656, "grad_norm": 938.416015625, "learning_rate": 7.738996153364364e-06, "loss": 128.9613, "step": 47510 }, { "epoch": 0.39310088100260576, "grad_norm": 960.8037719726562, "learning_rate": 7.737816117462752e-06, "loss": 96.7483, "step": 47520 }, { "epoch": 0.393183604251975, "grad_norm": 880.8233642578125, "learning_rate": 7.736635863725677e-06, "loss": 112.1702, "step": 47530 }, { "epoch": 0.39326632750134427, "grad_norm": 643.0890502929688, "learning_rate": 7.735455392247044e-06, "loss": 172.7514, "step": 47540 }, { "epoch": 0.39334905075071347, "grad_norm": 481.9482421875, "learning_rate": 7.73427470312078e-06, "loss": 98.0854, "step": 47550 }, { "epoch": 0.3934317740000827, "grad_norm": 7659.08837890625, "learning_rate": 7.733093796440828e-06, "loss": 94.3791, "step": 47560 }, { "epoch": 0.393514497249452, "grad_norm": 611.7446899414062, "learning_rate": 7.731912672301145e-06, "loss": 121.1342, "step": 47570 }, { "epoch": 0.3935972204988212, "grad_norm": 1074.301513671875, "learning_rate": 7.730731330795707e-06, "loss": 107.5944, "step": 47580 }, { "epoch": 0.39367994374819043, "grad_norm": 1130.7880859375, "learning_rate": 7.72954977201851e-06, "loss": 88.7905, "step": 47590 }, { "epoch": 0.3937626669975597, "grad_norm": 427.6048278808594, "learning_rate": 7.728367996063566e-06, "loss": 95.4467, "step": 47600 }, { "epoch": 0.3938453902469289, "grad_norm": 1087.6966552734375, "learning_rate": 7.727186003024902e-06, "loss": 116.7486, "step": 47610 }, { "epoch": 0.39392811349629814, "grad_norm": 677.6015014648438, "learning_rate": 7.726003792996562e-06, "loss": 112.2149, "step": 47620 }, { "epoch": 0.3940108367456674, "grad_norm": 1281.115478515625, "learning_rate": 7.724821366072612e-06, "loss": 112.6385, "step": 47630 }, { "epoch": 0.3940935599950366, "grad_norm": 1624.0828857421875, "learning_rate": 7.723638722347132e-06, "loss": 114.2262, "step": 47640 }, { "epoch": 0.39417628324440585, "grad_norm": 835.2601318359375, "learning_rate": 7.722455861914218e-06, "loss": 87.17, "step": 47650 }, { "epoch": 0.3942590064937751, "grad_norm": 721.5057983398438, "learning_rate": 7.721272784867983e-06, "loss": 112.5632, "step": 47660 }, { "epoch": 0.3943417297431443, "grad_norm": 909.7614135742188, "learning_rate": 7.720089491302565e-06, "loss": 105.2768, "step": 47670 }, { "epoch": 0.39442445299251355, "grad_norm": 641.9271240234375, "learning_rate": 7.718905981312108e-06, "loss": 93.7019, "step": 47680 }, { "epoch": 0.3945071762418828, "grad_norm": 585.551025390625, "learning_rate": 7.71772225499078e-06, "loss": 80.3911, "step": 47690 }, { "epoch": 0.394589899491252, "grad_norm": 783.8123168945312, "learning_rate": 7.716538312432767e-06, "loss": 115.101, "step": 47700 }, { "epoch": 0.39467262274062126, "grad_norm": 644.3832397460938, "learning_rate": 7.715354153732265e-06, "loss": 99.6165, "step": 47710 }, { "epoch": 0.3947553459899905, "grad_norm": 1999.86572265625, "learning_rate": 7.714169778983496e-06, "loss": 117.3061, "step": 47720 }, { "epoch": 0.3948380692393597, "grad_norm": 1040.9405517578125, "learning_rate": 7.712985188280694e-06, "loss": 101.7906, "step": 47730 }, { "epoch": 0.39492079248872897, "grad_norm": 1040.4888916015625, "learning_rate": 7.711800381718111e-06, "loss": 110.293, "step": 47740 }, { "epoch": 0.39500351573809817, "grad_norm": 1044.0440673828125, "learning_rate": 7.710615359390018e-06, "loss": 94.9559, "step": 47750 }, { "epoch": 0.3950862389874674, "grad_norm": 749.12939453125, "learning_rate": 7.7094301213907e-06, "loss": 92.2077, "step": 47760 }, { "epoch": 0.3951689622368367, "grad_norm": 1238.37158203125, "learning_rate": 7.708244667814463e-06, "loss": 100.6183, "step": 47770 }, { "epoch": 0.3952516854862059, "grad_norm": 821.638916015625, "learning_rate": 7.707058998755626e-06, "loss": 109.0208, "step": 47780 }, { "epoch": 0.3953344087355751, "grad_norm": 767.8424072265625, "learning_rate": 7.705873114308529e-06, "loss": 85.0486, "step": 47790 }, { "epoch": 0.3954171319849444, "grad_norm": 756.989501953125, "learning_rate": 7.704687014567524e-06, "loss": 89.8211, "step": 47800 }, { "epoch": 0.3954998552343136, "grad_norm": 1021.226806640625, "learning_rate": 7.703500699626988e-06, "loss": 90.8781, "step": 47810 }, { "epoch": 0.39558257848368283, "grad_norm": 749.4614868164062, "learning_rate": 7.702314169581311e-06, "loss": 103.451, "step": 47820 }, { "epoch": 0.3956653017330521, "grad_norm": 954.7346801757812, "learning_rate": 7.701127424524894e-06, "loss": 97.5412, "step": 47830 }, { "epoch": 0.3957480249824213, "grad_norm": 704.9465942382812, "learning_rate": 7.699940464552166e-06, "loss": 128.5169, "step": 47840 }, { "epoch": 0.39583074823179054, "grad_norm": 926.2518310546875, "learning_rate": 7.698753289757565e-06, "loss": 103.6893, "step": 47850 }, { "epoch": 0.3959134714811598, "grad_norm": 903.693115234375, "learning_rate": 7.69756590023555e-06, "loss": 117.7347, "step": 47860 }, { "epoch": 0.395996194730529, "grad_norm": 576.5755004882812, "learning_rate": 7.696378296080598e-06, "loss": 77.2486, "step": 47870 }, { "epoch": 0.39607891797989825, "grad_norm": 1016.8932495117188, "learning_rate": 7.6951904773872e-06, "loss": 102.5683, "step": 47880 }, { "epoch": 0.3961616412292675, "grad_norm": 649.89013671875, "learning_rate": 7.694002444249863e-06, "loss": 101.7743, "step": 47890 }, { "epoch": 0.3962443644786367, "grad_norm": 2596.306396484375, "learning_rate": 7.692814196763118e-06, "loss": 125.352, "step": 47900 }, { "epoch": 0.39632708772800596, "grad_norm": 1052.6181640625, "learning_rate": 7.691625735021505e-06, "loss": 109.4487, "step": 47910 }, { "epoch": 0.3964098109773752, "grad_norm": 909.6773071289062, "learning_rate": 7.690437059119584e-06, "loss": 132.6711, "step": 47920 }, { "epoch": 0.3964925342267444, "grad_norm": 468.37310791015625, "learning_rate": 7.689248169151935e-06, "loss": 89.8137, "step": 47930 }, { "epoch": 0.39657525747611366, "grad_norm": 1061.165771484375, "learning_rate": 7.68805906521315e-06, "loss": 117.4328, "step": 47940 }, { "epoch": 0.3966579807254829, "grad_norm": 757.1315307617188, "learning_rate": 7.686869747397843e-06, "loss": 101.1938, "step": 47950 }, { "epoch": 0.3967407039748521, "grad_norm": 811.54052734375, "learning_rate": 7.685680215800639e-06, "loss": 98.3037, "step": 47960 }, { "epoch": 0.39682342722422137, "grad_norm": 880.4942626953125, "learning_rate": 7.684490470516185e-06, "loss": 105.742, "step": 47970 }, { "epoch": 0.3969061504735906, "grad_norm": 3485.52685546875, "learning_rate": 7.683300511639149e-06, "loss": 121.3876, "step": 47980 }, { "epoch": 0.3969888737229598, "grad_norm": 794.6721801757812, "learning_rate": 7.682110339264203e-06, "loss": 99.216, "step": 47990 }, { "epoch": 0.3970715969723291, "grad_norm": 1388.62255859375, "learning_rate": 7.680919953486047e-06, "loss": 114.4334, "step": 48000 }, { "epoch": 0.39715432022169833, "grad_norm": 617.267578125, "learning_rate": 7.679729354399395e-06, "loss": 96.2605, "step": 48010 }, { "epoch": 0.39723704347106753, "grad_norm": 577.44189453125, "learning_rate": 7.678538542098974e-06, "loss": 106.764, "step": 48020 }, { "epoch": 0.3973197667204368, "grad_norm": 922.736083984375, "learning_rate": 7.677347516679536e-06, "loss": 94.4455, "step": 48030 }, { "epoch": 0.39740248996980604, "grad_norm": 1115.3370361328125, "learning_rate": 7.676156278235845e-06, "loss": 124.8899, "step": 48040 }, { "epoch": 0.39748521321917524, "grad_norm": 956.1436157226562, "learning_rate": 7.674964826862679e-06, "loss": 114.437, "step": 48050 }, { "epoch": 0.3975679364685445, "grad_norm": 1833.231201171875, "learning_rate": 7.673773162654836e-06, "loss": 127.1646, "step": 48060 }, { "epoch": 0.39765065971791375, "grad_norm": 424.2684631347656, "learning_rate": 7.672581285707135e-06, "loss": 88.6944, "step": 48070 }, { "epoch": 0.39773338296728294, "grad_norm": 1856.982421875, "learning_rate": 7.67138919611441e-06, "loss": 154.6424, "step": 48080 }, { "epoch": 0.3978161062166522, "grad_norm": 907.585205078125, "learning_rate": 7.670196893971502e-06, "loss": 121.6254, "step": 48090 }, { "epoch": 0.3978988294660214, "grad_norm": 1184.2100830078125, "learning_rate": 7.669004379373284e-06, "loss": 86.6673, "step": 48100 }, { "epoch": 0.39798155271539065, "grad_norm": 1260.71875, "learning_rate": 7.667811652414637e-06, "loss": 87.2874, "step": 48110 }, { "epoch": 0.3980642759647599, "grad_norm": 986.4711303710938, "learning_rate": 7.666618713190459e-06, "loss": 98.885, "step": 48120 }, { "epoch": 0.3981469992141291, "grad_norm": 1163.977783203125, "learning_rate": 7.665425561795669e-06, "loss": 86.0785, "step": 48130 }, { "epoch": 0.39822972246349836, "grad_norm": 851.3588256835938, "learning_rate": 7.664232198325198e-06, "loss": 88.372, "step": 48140 }, { "epoch": 0.3983124457128676, "grad_norm": 786.80419921875, "learning_rate": 7.663038622873999e-06, "loss": 83.9941, "step": 48150 }, { "epoch": 0.3983951689622368, "grad_norm": 1034.7119140625, "learning_rate": 7.66184483553704e-06, "loss": 91.0622, "step": 48160 }, { "epoch": 0.39847789221160607, "grad_norm": 1714.0181884765625, "learning_rate": 7.660650836409302e-06, "loss": 88.6052, "step": 48170 }, { "epoch": 0.3985606154609753, "grad_norm": 1282.2288818359375, "learning_rate": 7.65945662558579e-06, "loss": 102.4522, "step": 48180 }, { "epoch": 0.3986433387103445, "grad_norm": 1070.42626953125, "learning_rate": 7.658262203161517e-06, "loss": 120.5579, "step": 48190 }, { "epoch": 0.3987260619597138, "grad_norm": 1412.6171875, "learning_rate": 7.65706756923152e-06, "loss": 88.6738, "step": 48200 }, { "epoch": 0.398808785209083, "grad_norm": 745.8485107421875, "learning_rate": 7.655872723890854e-06, "loss": 116.865, "step": 48210 }, { "epoch": 0.3988915084584522, "grad_norm": 938.5888671875, "learning_rate": 7.654677667234582e-06, "loss": 112.7861, "step": 48220 }, { "epoch": 0.3989742317078215, "grad_norm": 692.1904296875, "learning_rate": 7.65348239935779e-06, "loss": 93.3395, "step": 48230 }, { "epoch": 0.39905695495719073, "grad_norm": 892.9244384765625, "learning_rate": 7.652286920355583e-06, "loss": 123.322, "step": 48240 }, { "epoch": 0.39913967820655993, "grad_norm": 353.6568603515625, "learning_rate": 7.651091230323079e-06, "loss": 75.8525, "step": 48250 }, { "epoch": 0.3992224014559292, "grad_norm": 1054.718017578125, "learning_rate": 7.649895329355411e-06, "loss": 144.2353, "step": 48260 }, { "epoch": 0.39930512470529844, "grad_norm": 854.1619873046875, "learning_rate": 7.648699217547733e-06, "loss": 89.6152, "step": 48270 }, { "epoch": 0.39938784795466764, "grad_norm": 1015.1759033203125, "learning_rate": 7.647502894995215e-06, "loss": 97.5599, "step": 48280 }, { "epoch": 0.3994705712040369, "grad_norm": 691.9119873046875, "learning_rate": 7.646306361793042e-06, "loss": 95.9693, "step": 48290 }, { "epoch": 0.39955329445340615, "grad_norm": 545.0184936523438, "learning_rate": 7.645109618036416e-06, "loss": 89.2513, "step": 48300 }, { "epoch": 0.39963601770277535, "grad_norm": 1180.732666015625, "learning_rate": 7.643912663820559e-06, "loss": 107.0819, "step": 48310 }, { "epoch": 0.3997187409521446, "grad_norm": 634.8629760742188, "learning_rate": 7.642715499240702e-06, "loss": 93.8869, "step": 48320 }, { "epoch": 0.39980146420151386, "grad_norm": 878.6117553710938, "learning_rate": 7.641518124392105e-06, "loss": 117.2257, "step": 48330 }, { "epoch": 0.39988418745088306, "grad_norm": 741.9995727539062, "learning_rate": 7.640320539370032e-06, "loss": 92.1223, "step": 48340 }, { "epoch": 0.3999669107002523, "grad_norm": 693.643310546875, "learning_rate": 7.63912274426977e-06, "loss": 102.7763, "step": 48350 }, { "epoch": 0.40004963394962156, "grad_norm": 747.3237915039062, "learning_rate": 7.637924739186624e-06, "loss": 83.4088, "step": 48360 }, { "epoch": 0.40013235719899076, "grad_norm": 1104.4271240234375, "learning_rate": 7.636726524215913e-06, "loss": 96.2126, "step": 48370 }, { "epoch": 0.40021508044836, "grad_norm": 1066.0506591796875, "learning_rate": 7.635528099452974e-06, "loss": 137.2111, "step": 48380 }, { "epoch": 0.40029780369772927, "grad_norm": 764.3858032226562, "learning_rate": 7.634329464993158e-06, "loss": 99.6301, "step": 48390 }, { "epoch": 0.40038052694709847, "grad_norm": 619.43017578125, "learning_rate": 7.633130620931837e-06, "loss": 95.3814, "step": 48400 }, { "epoch": 0.4004632501964677, "grad_norm": 576.7614135742188, "learning_rate": 7.631931567364398e-06, "loss": 115.1573, "step": 48410 }, { "epoch": 0.400545973445837, "grad_norm": 1004.3753662109375, "learning_rate": 7.630732304386244e-06, "loss": 90.6397, "step": 48420 }, { "epoch": 0.4006286966952062, "grad_norm": 1928.7547607421875, "learning_rate": 7.629532832092792e-06, "loss": 98.6528, "step": 48430 }, { "epoch": 0.40071141994457543, "grad_norm": 1009.5800170898438, "learning_rate": 7.62833315057948e-06, "loss": 105.3359, "step": 48440 }, { "epoch": 0.4007941431939447, "grad_norm": 573.4387817382812, "learning_rate": 7.627133259941762e-06, "loss": 95.6264, "step": 48450 }, { "epoch": 0.4008768664433139, "grad_norm": 1055.6937255859375, "learning_rate": 7.625933160275109e-06, "loss": 98.1826, "step": 48460 }, { "epoch": 0.40095958969268314, "grad_norm": 636.2737426757812, "learning_rate": 7.6247328516750055e-06, "loss": 106.1883, "step": 48470 }, { "epoch": 0.40104231294205234, "grad_norm": 624.5562744140625, "learning_rate": 7.623532334236954e-06, "loss": 115.2045, "step": 48480 }, { "epoch": 0.4011250361914216, "grad_norm": 1036.740234375, "learning_rate": 7.622331608056474e-06, "loss": 100.7731, "step": 48490 }, { "epoch": 0.40120775944079085, "grad_norm": 1890.8753662109375, "learning_rate": 7.621130673229105e-06, "loss": 98.8333, "step": 48500 }, { "epoch": 0.40129048269016004, "grad_norm": 967.46044921875, "learning_rate": 7.619929529850397e-06, "loss": 62.4726, "step": 48510 }, { "epoch": 0.4013732059395293, "grad_norm": 861.5704956054688, "learning_rate": 7.618728178015919e-06, "loss": 94.6595, "step": 48520 }, { "epoch": 0.40145592918889855, "grad_norm": 1017.3840942382812, "learning_rate": 7.617526617821259e-06, "loss": 107.7875, "step": 48530 }, { "epoch": 0.40153865243826775, "grad_norm": 2595.431640625, "learning_rate": 7.616324849362019e-06, "loss": 104.2326, "step": 48540 }, { "epoch": 0.401621375687637, "grad_norm": 767.1851196289062, "learning_rate": 7.615122872733819e-06, "loss": 126.0866, "step": 48550 }, { "epoch": 0.40170409893700626, "grad_norm": 901.1397705078125, "learning_rate": 7.613920688032293e-06, "loss": 120.4315, "step": 48560 }, { "epoch": 0.40178682218637546, "grad_norm": 1408.295166015625, "learning_rate": 7.612718295353094e-06, "loss": 104.612, "step": 48570 }, { "epoch": 0.4018695454357447, "grad_norm": 746.8675537109375, "learning_rate": 7.61151569479189e-06, "loss": 84.4078, "step": 48580 }, { "epoch": 0.40195226868511397, "grad_norm": 925.1431884765625, "learning_rate": 7.610312886444369e-06, "loss": 89.8368, "step": 48590 }, { "epoch": 0.40203499193448317, "grad_norm": 757.10693359375, "learning_rate": 7.60910987040623e-06, "loss": 92.3049, "step": 48600 }, { "epoch": 0.4021177151838524, "grad_norm": 777.216552734375, "learning_rate": 7.607906646773195e-06, "loss": 105.3716, "step": 48610 }, { "epoch": 0.4022004384332217, "grad_norm": 751.5682983398438, "learning_rate": 7.606703215640995e-06, "loss": 92.2909, "step": 48620 }, { "epoch": 0.4022831616825909, "grad_norm": 1239.3206787109375, "learning_rate": 7.605499577105382e-06, "loss": 106.9025, "step": 48630 }, { "epoch": 0.4023658849319601, "grad_norm": 750.9842529296875, "learning_rate": 7.604295731262128e-06, "loss": 115.5005, "step": 48640 }, { "epoch": 0.4024486081813294, "grad_norm": 1378.44677734375, "learning_rate": 7.603091678207013e-06, "loss": 95.4831, "step": 48650 }, { "epoch": 0.4025313314306986, "grad_norm": 883.4937133789062, "learning_rate": 7.60188741803584e-06, "loss": 94.2378, "step": 48660 }, { "epoch": 0.40261405468006783, "grad_norm": 927.4861450195312, "learning_rate": 7.600682950844428e-06, "loss": 97.9098, "step": 48670 }, { "epoch": 0.4026967779294371, "grad_norm": 999.6773071289062, "learning_rate": 7.599478276728607e-06, "loss": 88.1278, "step": 48680 }, { "epoch": 0.4027795011788063, "grad_norm": 981.5151977539062, "learning_rate": 7.5982733957842304e-06, "loss": 96.1045, "step": 48690 }, { "epoch": 0.40286222442817554, "grad_norm": 779.521728515625, "learning_rate": 7.597068308107165e-06, "loss": 96.5194, "step": 48700 }, { "epoch": 0.4029449476775448, "grad_norm": 723.9231567382812, "learning_rate": 7.595863013793292e-06, "loss": 105.2723, "step": 48710 }, { "epoch": 0.403027670926914, "grad_norm": 688.5134887695312, "learning_rate": 7.594657512938513e-06, "loss": 73.8923, "step": 48720 }, { "epoch": 0.40311039417628325, "grad_norm": 1107.6732177734375, "learning_rate": 7.593451805638743e-06, "loss": 84.0132, "step": 48730 }, { "epoch": 0.4031931174256525, "grad_norm": 971.2855834960938, "learning_rate": 7.592245891989914e-06, "loss": 126.4593, "step": 48740 }, { "epoch": 0.4032758406750217, "grad_norm": 906.73388671875, "learning_rate": 7.5910397720879785e-06, "loss": 112.5872, "step": 48750 }, { "epoch": 0.40335856392439096, "grad_norm": 976.5882568359375, "learning_rate": 7.589833446028898e-06, "loss": 113.1635, "step": 48760 }, { "epoch": 0.4034412871737602, "grad_norm": 517.5700073242188, "learning_rate": 7.5886269139086565e-06, "loss": 95.9183, "step": 48770 }, { "epoch": 0.4035240104231294, "grad_norm": 1049.484375, "learning_rate": 7.587420175823252e-06, "loss": 114.5341, "step": 48780 }, { "epoch": 0.40360673367249866, "grad_norm": 1442.658447265625, "learning_rate": 7.586213231868699e-06, "loss": 89.2601, "step": 48790 }, { "epoch": 0.4036894569218679, "grad_norm": 923.6640625, "learning_rate": 7.585006082141028e-06, "loss": 100.5833, "step": 48800 }, { "epoch": 0.4037721801712371, "grad_norm": 882.6138916015625, "learning_rate": 7.583798726736286e-06, "loss": 111.9895, "step": 48810 }, { "epoch": 0.40385490342060637, "grad_norm": 841.87548828125, "learning_rate": 7.5825911657505365e-06, "loss": 110.3644, "step": 48820 }, { "epoch": 0.40393762666997557, "grad_norm": 686.5569458007812, "learning_rate": 7.581383399279863e-06, "loss": 113.7908, "step": 48830 }, { "epoch": 0.4040203499193448, "grad_norm": 1229.239501953125, "learning_rate": 7.580175427420358e-06, "loss": 94.2136, "step": 48840 }, { "epoch": 0.4041030731687141, "grad_norm": 1013.9456176757812, "learning_rate": 7.578967250268137e-06, "loss": 112.4359, "step": 48850 }, { "epoch": 0.4041857964180833, "grad_norm": 843.6847534179688, "learning_rate": 7.577758867919325e-06, "loss": 119.3482, "step": 48860 }, { "epoch": 0.40426851966745253, "grad_norm": 660.8492431640625, "learning_rate": 7.576550280470072e-06, "loss": 105.5132, "step": 48870 }, { "epoch": 0.4043512429168218, "grad_norm": 1110.703125, "learning_rate": 7.5753414880165365e-06, "loss": 117.6248, "step": 48880 }, { "epoch": 0.404433966166191, "grad_norm": 971.7891235351562, "learning_rate": 7.5741324906548996e-06, "loss": 84.8425, "step": 48890 }, { "epoch": 0.40451668941556024, "grad_norm": 674.7775268554688, "learning_rate": 7.572923288481355e-06, "loss": 93.9152, "step": 48900 }, { "epoch": 0.4045994126649295, "grad_norm": 1152.0150146484375, "learning_rate": 7.571713881592109e-06, "loss": 97.321, "step": 48910 }, { "epoch": 0.4046821359142987, "grad_norm": 813.1100463867188, "learning_rate": 7.570504270083394e-06, "loss": 107.887, "step": 48920 }, { "epoch": 0.40476485916366794, "grad_norm": 860.7267456054688, "learning_rate": 7.569294454051452e-06, "loss": 93.7367, "step": 48930 }, { "epoch": 0.4048475824130372, "grad_norm": 641.8156127929688, "learning_rate": 7.568084433592542e-06, "loss": 103.0248, "step": 48940 }, { "epoch": 0.4049303056624064, "grad_norm": 1134.04736328125, "learning_rate": 7.566874208802939e-06, "loss": 101.0284, "step": 48950 }, { "epoch": 0.40501302891177565, "grad_norm": 596.593017578125, "learning_rate": 7.5656637797789335e-06, "loss": 80.7004, "step": 48960 }, { "epoch": 0.4050957521611449, "grad_norm": 791.4570922851562, "learning_rate": 7.564453146616837e-06, "loss": 109.8588, "step": 48970 }, { "epoch": 0.4051784754105141, "grad_norm": 681.3819580078125, "learning_rate": 7.563242309412975e-06, "loss": 105.6946, "step": 48980 }, { "epoch": 0.40526119865988336, "grad_norm": 881.9998168945312, "learning_rate": 7.562031268263686e-06, "loss": 104.3682, "step": 48990 }, { "epoch": 0.4053439219092526, "grad_norm": 985.5377807617188, "learning_rate": 7.5608200232653254e-06, "loss": 146.3788, "step": 49000 }, { "epoch": 0.4054266451586218, "grad_norm": 547.89697265625, "learning_rate": 7.5596085745142654e-06, "loss": 126.0642, "step": 49010 }, { "epoch": 0.40550936840799107, "grad_norm": 1244.3304443359375, "learning_rate": 7.558396922106903e-06, "loss": 105.9497, "step": 49020 }, { "epoch": 0.4055920916573603, "grad_norm": 1159.8284912109375, "learning_rate": 7.557185066139638e-06, "loss": 123.0697, "step": 49030 }, { "epoch": 0.4056748149067295, "grad_norm": 1442.5506591796875, "learning_rate": 7.555973006708892e-06, "loss": 124.4241, "step": 49040 }, { "epoch": 0.4057575381560988, "grad_norm": 1344.17236328125, "learning_rate": 7.554760743911104e-06, "loss": 98.3027, "step": 49050 }, { "epoch": 0.40584026140546803, "grad_norm": 733.8927001953125, "learning_rate": 7.553548277842729e-06, "loss": 90.8391, "step": 49060 }, { "epoch": 0.4059229846548372, "grad_norm": 1737.365966796875, "learning_rate": 7.5523356086002364e-06, "loss": 128.2919, "step": 49070 }, { "epoch": 0.4060057079042065, "grad_norm": 1121.77783203125, "learning_rate": 7.551122736280113e-06, "loss": 114.6872, "step": 49080 }, { "epoch": 0.40608843115357574, "grad_norm": 830.7010498046875, "learning_rate": 7.549909660978863e-06, "loss": 103.0037, "step": 49090 }, { "epoch": 0.40617115440294493, "grad_norm": 950.9368896484375, "learning_rate": 7.548696382793002e-06, "loss": 76.3594, "step": 49100 }, { "epoch": 0.4062538776523142, "grad_norm": 721.6538696289062, "learning_rate": 7.547482901819066e-06, "loss": 101.0682, "step": 49110 }, { "epoch": 0.40633660090168344, "grad_norm": 1179.335693359375, "learning_rate": 7.5462692181536094e-06, "loss": 113.5078, "step": 49120 }, { "epoch": 0.40641932415105264, "grad_norm": 1521.753173828125, "learning_rate": 7.545055331893195e-06, "loss": 127.0566, "step": 49130 }, { "epoch": 0.4065020474004219, "grad_norm": 718.9688720703125, "learning_rate": 7.543841243134409e-06, "loss": 77.3431, "step": 49140 }, { "epoch": 0.40658477064979115, "grad_norm": 1070.0787353515625, "learning_rate": 7.5426269519738495e-06, "loss": 108.8352, "step": 49150 }, { "epoch": 0.40666749389916035, "grad_norm": 514.2880859375, "learning_rate": 7.541412458508133e-06, "loss": 104.5422, "step": 49160 }, { "epoch": 0.4067502171485296, "grad_norm": 774.6307983398438, "learning_rate": 7.54019776283389e-06, "loss": 100.5812, "step": 49170 }, { "epoch": 0.40683294039789886, "grad_norm": 600.9510498046875, "learning_rate": 7.53898286504777e-06, "loss": 75.3506, "step": 49180 }, { "epoch": 0.40691566364726806, "grad_norm": 843.1857299804688, "learning_rate": 7.537767765246436e-06, "loss": 110.9489, "step": 49190 }, { "epoch": 0.4069983868966373, "grad_norm": 957.2750854492188, "learning_rate": 7.536552463526565e-06, "loss": 86.3647, "step": 49200 }, { "epoch": 0.4070811101460065, "grad_norm": 1061.4150390625, "learning_rate": 7.535336959984858e-06, "loss": 127.3531, "step": 49210 }, { "epoch": 0.40716383339537576, "grad_norm": 2062.2001953125, "learning_rate": 7.5341212547180246e-06, "loss": 111.2139, "step": 49220 }, { "epoch": 0.407246556644745, "grad_norm": 768.780517578125, "learning_rate": 7.532905347822792e-06, "loss": 116.6353, "step": 49230 }, { "epoch": 0.4073292798941142, "grad_norm": 780.7527465820312, "learning_rate": 7.5316892393959064e-06, "loss": 91.1807, "step": 49240 }, { "epoch": 0.40741200314348347, "grad_norm": 487.50665283203125, "learning_rate": 7.530472929534126e-06, "loss": 110.9754, "step": 49250 }, { "epoch": 0.4074947263928527, "grad_norm": 578.7011108398438, "learning_rate": 7.529256418334228e-06, "loss": 131.2194, "step": 49260 }, { "epoch": 0.4075774496422219, "grad_norm": 818.4442749023438, "learning_rate": 7.528039705893006e-06, "loss": 98.4813, "step": 49270 }, { "epoch": 0.4076601728915912, "grad_norm": 555.372314453125, "learning_rate": 7.5268227923072665e-06, "loss": 100.5567, "step": 49280 }, { "epoch": 0.40774289614096043, "grad_norm": 1478.378173828125, "learning_rate": 7.525605677673831e-06, "loss": 94.0006, "step": 49290 }, { "epoch": 0.40782561939032963, "grad_norm": 1054.407958984375, "learning_rate": 7.524388362089545e-06, "loss": 110.3638, "step": 49300 }, { "epoch": 0.4079083426396989, "grad_norm": 936.5081176757812, "learning_rate": 7.523170845651263e-06, "loss": 108.1984, "step": 49310 }, { "epoch": 0.40799106588906814, "grad_norm": 714.961181640625, "learning_rate": 7.521953128455856e-06, "loss": 109.1958, "step": 49320 }, { "epoch": 0.40807378913843734, "grad_norm": 850.507080078125, "learning_rate": 7.520735210600213e-06, "loss": 117.9377, "step": 49330 }, { "epoch": 0.4081565123878066, "grad_norm": 815.2796020507812, "learning_rate": 7.519517092181237e-06, "loss": 128.2985, "step": 49340 }, { "epoch": 0.40823923563717585, "grad_norm": 1209.8961181640625, "learning_rate": 7.518298773295849e-06, "loss": 108.5808, "step": 49350 }, { "epoch": 0.40832195888654504, "grad_norm": 1075.5911865234375, "learning_rate": 7.517080254040985e-06, "loss": 87.4483, "step": 49360 }, { "epoch": 0.4084046821359143, "grad_norm": 952.4523315429688, "learning_rate": 7.5158615345136e-06, "loss": 85.5586, "step": 49370 }, { "epoch": 0.40848740538528355, "grad_norm": 688.7626342773438, "learning_rate": 7.514642614810655e-06, "loss": 113.6158, "step": 49380 }, { "epoch": 0.40857012863465275, "grad_norm": 754.0592651367188, "learning_rate": 7.51342349502914e-06, "loss": 95.794, "step": 49390 }, { "epoch": 0.408652851884022, "grad_norm": 1221.88818359375, "learning_rate": 7.512204175266052e-06, "loss": 99.691, "step": 49400 }, { "epoch": 0.40873557513339126, "grad_norm": 781.3279418945312, "learning_rate": 7.510984655618407e-06, "loss": 105.6204, "step": 49410 }, { "epoch": 0.40881829838276046, "grad_norm": 593.7680053710938, "learning_rate": 7.509764936183237e-06, "loss": 82.4356, "step": 49420 }, { "epoch": 0.4089010216321297, "grad_norm": 2055.203369140625, "learning_rate": 7.5085450170575876e-06, "loss": 120.1463, "step": 49430 }, { "epoch": 0.40898374488149897, "grad_norm": 1296.7412109375, "learning_rate": 7.5073248983385265e-06, "loss": 130.0576, "step": 49440 }, { "epoch": 0.40906646813086817, "grad_norm": 754.3132934570312, "learning_rate": 7.50610458012313e-06, "loss": 88.8123, "step": 49450 }, { "epoch": 0.4091491913802374, "grad_norm": 827.0701904296875, "learning_rate": 7.504884062508493e-06, "loss": 85.6176, "step": 49460 }, { "epoch": 0.4092319146296067, "grad_norm": 533.4575805664062, "learning_rate": 7.503663345591726e-06, "loss": 99.3742, "step": 49470 }, { "epoch": 0.4093146378789759, "grad_norm": 914.8536376953125, "learning_rate": 7.502442429469956e-06, "loss": 123.0487, "step": 49480 }, { "epoch": 0.4093973611283451, "grad_norm": 1090.47998046875, "learning_rate": 7.501221314240329e-06, "loss": 110.9421, "step": 49490 }, { "epoch": 0.4094800843777144, "grad_norm": 969.5048828125, "learning_rate": 7.500000000000001e-06, "loss": 132.2819, "step": 49500 }, { "epoch": 0.4095628076270836, "grad_norm": 1003.5017700195312, "learning_rate": 7.4987784868461455e-06, "loss": 125.4059, "step": 49510 }, { "epoch": 0.40964553087645283, "grad_norm": 1233.950927734375, "learning_rate": 7.497556774875953e-06, "loss": 100.2119, "step": 49520 }, { "epoch": 0.4097282541258221, "grad_norm": 638.7094116210938, "learning_rate": 7.496334864186632e-06, "loss": 133.3458, "step": 49530 }, { "epoch": 0.4098109773751913, "grad_norm": 448.5832214355469, "learning_rate": 7.4951127548754025e-06, "loss": 95.0754, "step": 49540 }, { "epoch": 0.40989370062456054, "grad_norm": 1257.3818359375, "learning_rate": 7.4938904470395e-06, "loss": 91.5809, "step": 49550 }, { "epoch": 0.40997642387392974, "grad_norm": 1006.02294921875, "learning_rate": 7.492667940776182e-06, "loss": 102.9834, "step": 49560 }, { "epoch": 0.410059147123299, "grad_norm": 1712.72607421875, "learning_rate": 7.491445236182715e-06, "loss": 103.0404, "step": 49570 }, { "epoch": 0.41014187037266825, "grad_norm": 799.1417236328125, "learning_rate": 7.490222333356384e-06, "loss": 106.2483, "step": 49580 }, { "epoch": 0.41022459362203745, "grad_norm": 1142.0809326171875, "learning_rate": 7.488999232394492e-06, "loss": 151.4166, "step": 49590 }, { "epoch": 0.4103073168714067, "grad_norm": 1388.60693359375, "learning_rate": 7.487775933394353e-06, "loss": 103.9434, "step": 49600 }, { "epoch": 0.41039004012077596, "grad_norm": 835.6099853515625, "learning_rate": 7.4865524364533e-06, "loss": 104.3207, "step": 49610 }, { "epoch": 0.41047276337014516, "grad_norm": 653.7705688476562, "learning_rate": 7.485328741668683e-06, "loss": 83.743, "step": 49620 }, { "epoch": 0.4105554866195144, "grad_norm": 870.46484375, "learning_rate": 7.484104849137862e-06, "loss": 108.1679, "step": 49630 }, { "epoch": 0.41063820986888366, "grad_norm": 839.9007568359375, "learning_rate": 7.482880758958219e-06, "loss": 83.2425, "step": 49640 }, { "epoch": 0.41072093311825286, "grad_norm": 931.4784545898438, "learning_rate": 7.48165647122715e-06, "loss": 114.039, "step": 49650 }, { "epoch": 0.4108036563676221, "grad_norm": 776.007080078125, "learning_rate": 7.480431986042065e-06, "loss": 138.1686, "step": 49660 }, { "epoch": 0.41088637961699137, "grad_norm": 890.8333129882812, "learning_rate": 7.47920730350039e-06, "loss": 129.4647, "step": 49670 }, { "epoch": 0.41096910286636057, "grad_norm": 1296.5220947265625, "learning_rate": 7.477982423699568e-06, "loss": 120.0857, "step": 49680 }, { "epoch": 0.4110518261157298, "grad_norm": 1131.2828369140625, "learning_rate": 7.476757346737057e-06, "loss": 112.3677, "step": 49690 }, { "epoch": 0.4111345493650991, "grad_norm": 1283.157470703125, "learning_rate": 7.47553207271033e-06, "loss": 114.4162, "step": 49700 }, { "epoch": 0.4112172726144683, "grad_norm": 309.3968200683594, "learning_rate": 7.474306601716877e-06, "loss": 89.349, "step": 49710 }, { "epoch": 0.41129999586383753, "grad_norm": 961.7044677734375, "learning_rate": 7.473080933854205e-06, "loss": 81.6981, "step": 49720 }, { "epoch": 0.4113827191132068, "grad_norm": 427.75469970703125, "learning_rate": 7.471855069219831e-06, "loss": 111.079, "step": 49730 }, { "epoch": 0.411465442362576, "grad_norm": 973.0006103515625, "learning_rate": 7.470629007911294e-06, "loss": 99.5809, "step": 49740 }, { "epoch": 0.41154816561194524, "grad_norm": 1471.537109375, "learning_rate": 7.469402750026147e-06, "loss": 130.2198, "step": 49750 }, { "epoch": 0.4116308888613145, "grad_norm": 765.6078491210938, "learning_rate": 7.468176295661955e-06, "loss": 124.7598, "step": 49760 }, { "epoch": 0.4117136121106837, "grad_norm": 918.6041259765625, "learning_rate": 7.466949644916301e-06, "loss": 103.558, "step": 49770 }, { "epoch": 0.41179633536005295, "grad_norm": 933.6361694335938, "learning_rate": 7.465722797886788e-06, "loss": 94.5863, "step": 49780 }, { "epoch": 0.4118790586094222, "grad_norm": 623.9163818359375, "learning_rate": 7.464495754671027e-06, "loss": 86.9486, "step": 49790 }, { "epoch": 0.4119617818587914, "grad_norm": 592.259033203125, "learning_rate": 7.4632685153666505e-06, "loss": 111.5722, "step": 49800 }, { "epoch": 0.41204450510816065, "grad_norm": 430.714599609375, "learning_rate": 7.462041080071301e-06, "loss": 81.418, "step": 49810 }, { "epoch": 0.4121272283575299, "grad_norm": 1098.07373046875, "learning_rate": 7.460813448882643e-06, "loss": 145.5894, "step": 49820 }, { "epoch": 0.4122099516068991, "grad_norm": 630.3162841796875, "learning_rate": 7.459585621898353e-06, "loss": 88.393, "step": 49830 }, { "epoch": 0.41229267485626836, "grad_norm": 783.9848022460938, "learning_rate": 7.4583575992161235e-06, "loss": 73.975, "step": 49840 }, { "epoch": 0.4123753981056376, "grad_norm": 645.27392578125, "learning_rate": 7.457129380933662e-06, "loss": 108.0057, "step": 49850 }, { "epoch": 0.4124581213550068, "grad_norm": 522.0549926757812, "learning_rate": 7.4559009671486906e-06, "loss": 65.7118, "step": 49860 }, { "epoch": 0.41254084460437607, "grad_norm": 706.9224853515625, "learning_rate": 7.454672357958951e-06, "loss": 127.6793, "step": 49870 }, { "epoch": 0.4126235678537453, "grad_norm": 1543.4708251953125, "learning_rate": 7.453443553462198e-06, "loss": 138.2186, "step": 49880 }, { "epoch": 0.4127062911031145, "grad_norm": 1025.1986083984375, "learning_rate": 7.4522145537562015e-06, "loss": 117.5677, "step": 49890 }, { "epoch": 0.4127890143524838, "grad_norm": 611.4989013671875, "learning_rate": 7.450985358938747e-06, "loss": 106.0235, "step": 49900 }, { "epoch": 0.41287173760185303, "grad_norm": 1154.43701171875, "learning_rate": 7.449755969107635e-06, "loss": 113.3347, "step": 49910 }, { "epoch": 0.4129544608512222, "grad_norm": 1076.0106201171875, "learning_rate": 7.4485263843606835e-06, "loss": 104.9749, "step": 49920 }, { "epoch": 0.4130371841005915, "grad_norm": 1215.836669921875, "learning_rate": 7.447296604795726e-06, "loss": 119.1512, "step": 49930 }, { "epoch": 0.4131199073499607, "grad_norm": 1207.3167724609375, "learning_rate": 7.4460666305106084e-06, "loss": 101.0483, "step": 49940 }, { "epoch": 0.41320263059932993, "grad_norm": 1147.7183837890625, "learning_rate": 7.444836461603195e-06, "loss": 101.2153, "step": 49950 }, { "epoch": 0.4132853538486992, "grad_norm": 784.3466796875, "learning_rate": 7.443606098171363e-06, "loss": 86.7002, "step": 49960 }, { "epoch": 0.4133680770980684, "grad_norm": 452.4320068359375, "learning_rate": 7.442375540313012e-06, "loss": 94.4948, "step": 49970 }, { "epoch": 0.41345080034743764, "grad_norm": 911.47021484375, "learning_rate": 7.441144788126045e-06, "loss": 110.6758, "step": 49980 }, { "epoch": 0.4135335235968069, "grad_norm": 1147.6248779296875, "learning_rate": 7.4399138417083925e-06, "loss": 95.634, "step": 49990 }, { "epoch": 0.4136162468461761, "grad_norm": 1253.6624755859375, "learning_rate": 7.438682701157993e-06, "loss": 141.2699, "step": 50000 }, { "epoch": 0.41369897009554535, "grad_norm": 1440.07080078125, "learning_rate": 7.437451366572803e-06, "loss": 109.6948, "step": 50010 }, { "epoch": 0.4137816933449146, "grad_norm": 833.8212280273438, "learning_rate": 7.436219838050793e-06, "loss": 106.7741, "step": 50020 }, { "epoch": 0.4138644165942838, "grad_norm": 999.9931640625, "learning_rate": 7.4349881156899525e-06, "loss": 110.1778, "step": 50030 }, { "epoch": 0.41394713984365306, "grad_norm": 1517.33251953125, "learning_rate": 7.433756199588282e-06, "loss": 106.7463, "step": 50040 }, { "epoch": 0.4140298630930223, "grad_norm": 749.0157470703125, "learning_rate": 7.4325240898438e-06, "loss": 86.2277, "step": 50050 }, { "epoch": 0.4141125863423915, "grad_norm": 1004.859619140625, "learning_rate": 7.4312917865545406e-06, "loss": 101.3041, "step": 50060 }, { "epoch": 0.41419530959176076, "grad_norm": 2136.435546875, "learning_rate": 7.430059289818552e-06, "loss": 109.2253, "step": 50070 }, { "epoch": 0.41427803284113, "grad_norm": 1305.9910888671875, "learning_rate": 7.4288265997338985e-06, "loss": 116.3073, "step": 50080 }, { "epoch": 0.4143607560904992, "grad_norm": 1031.19921875, "learning_rate": 7.427593716398658e-06, "loss": 136.2479, "step": 50090 }, { "epoch": 0.41444347933986847, "grad_norm": 539.9341430664062, "learning_rate": 7.426360639910927e-06, "loss": 86.8462, "step": 50100 }, { "epoch": 0.4145262025892377, "grad_norm": 527.5219116210938, "learning_rate": 7.425127370368815e-06, "loss": 104.5311, "step": 50110 }, { "epoch": 0.4146089258386069, "grad_norm": 836.485107421875, "learning_rate": 7.423893907870449e-06, "loss": 125.8744, "step": 50120 }, { "epoch": 0.4146916490879762, "grad_norm": 514.3257446289062, "learning_rate": 7.422660252513969e-06, "loss": 100.2241, "step": 50130 }, { "epoch": 0.41477437233734543, "grad_norm": 773.184814453125, "learning_rate": 7.421426404397531e-06, "loss": 82.5671, "step": 50140 }, { "epoch": 0.41485709558671463, "grad_norm": 1118.6546630859375, "learning_rate": 7.420192363619305e-06, "loss": 116.5798, "step": 50150 }, { "epoch": 0.4149398188360839, "grad_norm": 1059.586669921875, "learning_rate": 7.418958130277483e-06, "loss": 94.7955, "step": 50160 }, { "epoch": 0.41502254208545314, "grad_norm": 468.1589660644531, "learning_rate": 7.417723704470261e-06, "loss": 92.9759, "step": 50170 }, { "epoch": 0.41510526533482234, "grad_norm": 2084.325927734375, "learning_rate": 7.4164890862958615e-06, "loss": 128.8667, "step": 50180 }, { "epoch": 0.4151879885841916, "grad_norm": 834.3111572265625, "learning_rate": 7.415254275852515e-06, "loss": 103.7669, "step": 50190 }, { "epoch": 0.41527071183356085, "grad_norm": 869.2041625976562, "learning_rate": 7.414019273238471e-06, "loss": 88.4897, "step": 50200 }, { "epoch": 0.41535343508293004, "grad_norm": 1044.08056640625, "learning_rate": 7.4127840785519915e-06, "loss": 98.155, "step": 50210 }, { "epoch": 0.4154361583322993, "grad_norm": 967.443603515625, "learning_rate": 7.411548691891357e-06, "loss": 115.068, "step": 50220 }, { "epoch": 0.41551888158166855, "grad_norm": 534.2567138671875, "learning_rate": 7.41031311335486e-06, "loss": 106.5501, "step": 50230 }, { "epoch": 0.41560160483103775, "grad_norm": 1074.7396240234375, "learning_rate": 7.409077343040809e-06, "loss": 153.6957, "step": 50240 }, { "epoch": 0.415684328080407, "grad_norm": 657.9391479492188, "learning_rate": 7.407841381047533e-06, "loss": 99.6328, "step": 50250 }, { "epoch": 0.41576705132977626, "grad_norm": 750.8782958984375, "learning_rate": 7.406605227473367e-06, "loss": 88.5056, "step": 50260 }, { "epoch": 0.41584977457914546, "grad_norm": 912.8604125976562, "learning_rate": 7.405368882416668e-06, "loss": 127.2815, "step": 50270 }, { "epoch": 0.4159324978285147, "grad_norm": 722.0275268554688, "learning_rate": 7.404132345975806e-06, "loss": 89.6634, "step": 50280 }, { "epoch": 0.4160152210778839, "grad_norm": 503.6108093261719, "learning_rate": 7.4028956182491665e-06, "loss": 65.2768, "step": 50290 }, { "epoch": 0.41609794432725317, "grad_norm": 1528.93212890625, "learning_rate": 7.401658699335151e-06, "loss": 110.4871, "step": 50300 }, { "epoch": 0.4161806675766224, "grad_norm": 742.7732543945312, "learning_rate": 7.400421589332175e-06, "loss": 85.2059, "step": 50310 }, { "epoch": 0.4162633908259916, "grad_norm": 1395.8526611328125, "learning_rate": 7.39918428833867e-06, "loss": 94.5206, "step": 50320 }, { "epoch": 0.4163461140753609, "grad_norm": 978.8078002929688, "learning_rate": 7.397946796453081e-06, "loss": 122.651, "step": 50330 }, { "epoch": 0.41642883732473013, "grad_norm": 738.7698974609375, "learning_rate": 7.39670911377387e-06, "loss": 128.4523, "step": 50340 }, { "epoch": 0.4165115605740993, "grad_norm": 924.0703735351562, "learning_rate": 7.395471240399515e-06, "loss": 100.5796, "step": 50350 }, { "epoch": 0.4165942838234686, "grad_norm": 1406.5579833984375, "learning_rate": 7.394233176428508e-06, "loss": 84.9948, "step": 50360 }, { "epoch": 0.41667700707283784, "grad_norm": 692.1464233398438, "learning_rate": 7.3929949219593545e-06, "loss": 108.0571, "step": 50370 }, { "epoch": 0.41675973032220703, "grad_norm": 935.2982177734375, "learning_rate": 7.391756477090577e-06, "loss": 106.4006, "step": 50380 }, { "epoch": 0.4168424535715763, "grad_norm": 803.556396484375, "learning_rate": 7.3905178419207126e-06, "loss": 91.8345, "step": 50390 }, { "epoch": 0.41692517682094554, "grad_norm": 170.64480590820312, "learning_rate": 7.3892790165483164e-06, "loss": 86.5613, "step": 50400 }, { "epoch": 0.41700790007031474, "grad_norm": 846.9085693359375, "learning_rate": 7.388040001071953e-06, "loss": 116.5208, "step": 50410 }, { "epoch": 0.417090623319684, "grad_norm": 488.96343994140625, "learning_rate": 7.386800795590208e-06, "loss": 84.3048, "step": 50420 }, { "epoch": 0.41717334656905325, "grad_norm": 2128.975830078125, "learning_rate": 7.385561400201675e-06, "loss": 101.9137, "step": 50430 }, { "epoch": 0.41725606981842245, "grad_norm": 886.4940795898438, "learning_rate": 7.384321815004971e-06, "loss": 122.7343, "step": 50440 }, { "epoch": 0.4173387930677917, "grad_norm": 1239.8963623046875, "learning_rate": 7.383082040098723e-06, "loss": 118.5375, "step": 50450 }, { "epoch": 0.41742151631716096, "grad_norm": 598.2474365234375, "learning_rate": 7.381842075581573e-06, "loss": 82.4002, "step": 50460 }, { "epoch": 0.41750423956653016, "grad_norm": 927.5972900390625, "learning_rate": 7.380601921552181e-06, "loss": 96.2848, "step": 50470 }, { "epoch": 0.4175869628158994, "grad_norm": 1360.900146484375, "learning_rate": 7.379361578109218e-06, "loss": 99.3107, "step": 50480 }, { "epoch": 0.41766968606526866, "grad_norm": 637.6759643554688, "learning_rate": 7.378121045351378e-06, "loss": 115.3234, "step": 50490 }, { "epoch": 0.41775240931463786, "grad_norm": 1148.364990234375, "learning_rate": 7.376880323377357e-06, "loss": 106.6024, "step": 50500 }, { "epoch": 0.4178351325640071, "grad_norm": 567.2054443359375, "learning_rate": 7.375639412285877e-06, "loss": 94.4765, "step": 50510 }, { "epoch": 0.41791785581337637, "grad_norm": 742.42236328125, "learning_rate": 7.374398312175674e-06, "loss": 103.2163, "step": 50520 }, { "epoch": 0.41800057906274557, "grad_norm": 698.6530151367188, "learning_rate": 7.373157023145493e-06, "loss": 86.0616, "step": 50530 }, { "epoch": 0.4180833023121148, "grad_norm": 1061.374755859375, "learning_rate": 7.371915545294098e-06, "loss": 74.492, "step": 50540 }, { "epoch": 0.4181660255614841, "grad_norm": 748.8624877929688, "learning_rate": 7.37067387872027e-06, "loss": 75.0185, "step": 50550 }, { "epoch": 0.4182487488108533, "grad_norm": 315.02374267578125, "learning_rate": 7.369432023522801e-06, "loss": 77.1597, "step": 50560 }, { "epoch": 0.41833147206022253, "grad_norm": 981.3681030273438, "learning_rate": 7.3681899798005006e-06, "loss": 118.4615, "step": 50570 }, { "epoch": 0.4184141953095918, "grad_norm": 1849.541259765625, "learning_rate": 7.366947747652191e-06, "loss": 104.9723, "step": 50580 }, { "epoch": 0.418496918558961, "grad_norm": 1334.4669189453125, "learning_rate": 7.365705327176713e-06, "loss": 100.1431, "step": 50590 }, { "epoch": 0.41857964180833024, "grad_norm": 1951.8896484375, "learning_rate": 7.364462718472919e-06, "loss": 142.2957, "step": 50600 }, { "epoch": 0.4186623650576995, "grad_norm": 659.5794067382812, "learning_rate": 7.363219921639677e-06, "loss": 96.1194, "step": 50610 }, { "epoch": 0.4187450883070687, "grad_norm": 960.1915893554688, "learning_rate": 7.361976936775872e-06, "loss": 117.1287, "step": 50620 }, { "epoch": 0.41882781155643795, "grad_norm": 940.3717651367188, "learning_rate": 7.360733763980404e-06, "loss": 110.9005, "step": 50630 }, { "epoch": 0.4189105348058072, "grad_norm": 741.2780151367188, "learning_rate": 7.3594904033521815e-06, "loss": 86.0149, "step": 50640 }, { "epoch": 0.4189932580551764, "grad_norm": 1676.6136474609375, "learning_rate": 7.358246854990138e-06, "loss": 119.8522, "step": 50650 }, { "epoch": 0.41907598130454565, "grad_norm": 1362.4383544921875, "learning_rate": 7.357003118993215e-06, "loss": 125.9308, "step": 50660 }, { "epoch": 0.41915870455391485, "grad_norm": 1395.822998046875, "learning_rate": 7.355759195460371e-06, "loss": 125.4457, "step": 50670 }, { "epoch": 0.4192414278032841, "grad_norm": 754.3294067382812, "learning_rate": 7.354515084490579e-06, "loss": 105.6307, "step": 50680 }, { "epoch": 0.41932415105265336, "grad_norm": 1079.152587890625, "learning_rate": 7.353270786182828e-06, "loss": 95.9782, "step": 50690 }, { "epoch": 0.41940687430202256, "grad_norm": 713.3341674804688, "learning_rate": 7.352026300636121e-06, "loss": 104.3826, "step": 50700 }, { "epoch": 0.4194895975513918, "grad_norm": 879.9957275390625, "learning_rate": 7.350781627949475e-06, "loss": 86.2512, "step": 50710 }, { "epoch": 0.41957232080076107, "grad_norm": 1013.7462158203125, "learning_rate": 7.3495367682219236e-06, "loss": 110.0644, "step": 50720 }, { "epoch": 0.41965504405013027, "grad_norm": 948.6749877929688, "learning_rate": 7.348291721552514e-06, "loss": 145.048, "step": 50730 }, { "epoch": 0.4197377672994995, "grad_norm": 2777.333251953125, "learning_rate": 7.3470464880403105e-06, "loss": 116.8931, "step": 50740 }, { "epoch": 0.4198204905488688, "grad_norm": 1682.1556396484375, "learning_rate": 7.345801067784388e-06, "loss": 94.5308, "step": 50750 }, { "epoch": 0.419903213798238, "grad_norm": 988.3783569335938, "learning_rate": 7.34455546088384e-06, "loss": 100.0454, "step": 50760 }, { "epoch": 0.4199859370476072, "grad_norm": 721.0875244140625, "learning_rate": 7.343309667437775e-06, "loss": 116.8833, "step": 50770 }, { "epoch": 0.4200686602969765, "grad_norm": 832.2625122070312, "learning_rate": 7.3420636875453135e-06, "loss": 77.9705, "step": 50780 }, { "epoch": 0.4201513835463457, "grad_norm": 995.8257446289062, "learning_rate": 7.340817521305595e-06, "loss": 101.0271, "step": 50790 }, { "epoch": 0.42023410679571493, "grad_norm": 935.89404296875, "learning_rate": 7.3395711688177676e-06, "loss": 123.7885, "step": 50800 }, { "epoch": 0.4203168300450842, "grad_norm": 957.3890380859375, "learning_rate": 7.3383246301809985e-06, "loss": 79.4234, "step": 50810 }, { "epoch": 0.4203995532944534, "grad_norm": 1143.3251953125, "learning_rate": 7.337077905494472e-06, "loss": 79.35, "step": 50820 }, { "epoch": 0.42048227654382264, "grad_norm": 617.7476806640625, "learning_rate": 7.335830994857382e-06, "loss": 83.5682, "step": 50830 }, { "epoch": 0.4205649997931919, "grad_norm": 923.8682861328125, "learning_rate": 7.334583898368939e-06, "loss": 83.5606, "step": 50840 }, { "epoch": 0.4206477230425611, "grad_norm": 456.18902587890625, "learning_rate": 7.333336616128369e-06, "loss": 110.9043, "step": 50850 }, { "epoch": 0.42073044629193035, "grad_norm": 629.4192504882812, "learning_rate": 7.332089148234913e-06, "loss": 84.0271, "step": 50860 }, { "epoch": 0.4208131695412996, "grad_norm": 777.4642333984375, "learning_rate": 7.330841494787828e-06, "loss": 94.1915, "step": 50870 }, { "epoch": 0.4208958927906688, "grad_norm": 1021.780029296875, "learning_rate": 7.329593655886382e-06, "loss": 86.0737, "step": 50880 }, { "epoch": 0.42097861604003806, "grad_norm": 1032.098876953125, "learning_rate": 7.3283456316298595e-06, "loss": 83.7801, "step": 50890 }, { "epoch": 0.4210613392894073, "grad_norm": 987.5896606445312, "learning_rate": 7.32709742211756e-06, "loss": 86.3313, "step": 50900 }, { "epoch": 0.4211440625387765, "grad_norm": 799.8026733398438, "learning_rate": 7.325849027448799e-06, "loss": 71.7253, "step": 50910 }, { "epoch": 0.42122678578814576, "grad_norm": 675.6878051757812, "learning_rate": 7.324600447722907e-06, "loss": 79.754, "step": 50920 }, { "epoch": 0.421309509037515, "grad_norm": 578.5625, "learning_rate": 7.323351683039224e-06, "loss": 103.9349, "step": 50930 }, { "epoch": 0.4213922322868842, "grad_norm": 1150.8740234375, "learning_rate": 7.32210273349711e-06, "loss": 101.4098, "step": 50940 }, { "epoch": 0.42147495553625347, "grad_norm": 1209.810546875, "learning_rate": 7.32085359919594e-06, "loss": 112.2176, "step": 50950 }, { "epoch": 0.4215576787856227, "grad_norm": 1046.1680908203125, "learning_rate": 7.3196042802350995e-06, "loss": 81.5384, "step": 50960 }, { "epoch": 0.4216404020349919, "grad_norm": 1647.843017578125, "learning_rate": 7.3183547767139916e-06, "loss": 106.6991, "step": 50970 }, { "epoch": 0.4217231252843612, "grad_norm": 1031.4130859375, "learning_rate": 7.317105088732035e-06, "loss": 102.2982, "step": 50980 }, { "epoch": 0.42180584853373043, "grad_norm": 739.594482421875, "learning_rate": 7.31585521638866e-06, "loss": 83.5706, "step": 50990 }, { "epoch": 0.42188857178309963, "grad_norm": 1355.9227294921875, "learning_rate": 7.314605159783313e-06, "loss": 155.9577, "step": 51000 }, { "epoch": 0.4219712950324689, "grad_norm": 752.1953735351562, "learning_rate": 7.313354919015457e-06, "loss": 87.4393, "step": 51010 }, { "epoch": 0.4220540182818381, "grad_norm": 1794.4256591796875, "learning_rate": 7.312104494184566e-06, "loss": 140.2643, "step": 51020 }, { "epoch": 0.42213674153120734, "grad_norm": 1220.9268798828125, "learning_rate": 7.310853885390133e-06, "loss": 98.7091, "step": 51030 }, { "epoch": 0.4222194647805766, "grad_norm": 2305.75, "learning_rate": 7.309603092731661e-06, "loss": 112.4157, "step": 51040 }, { "epoch": 0.4223021880299458, "grad_norm": 1095.7064208984375, "learning_rate": 7.30835211630867e-06, "loss": 102.6219, "step": 51050 }, { "epoch": 0.42238491127931505, "grad_norm": 678.8933715820312, "learning_rate": 7.3071009562206965e-06, "loss": 88.1793, "step": 51060 }, { "epoch": 0.4224676345286843, "grad_norm": 809.6482543945312, "learning_rate": 7.305849612567287e-06, "loss": 99.8888, "step": 51070 }, { "epoch": 0.4225503577780535, "grad_norm": 1535.9327392578125, "learning_rate": 7.304598085448007e-06, "loss": 119.08, "step": 51080 }, { "epoch": 0.42263308102742275, "grad_norm": 953.1244506835938, "learning_rate": 7.303346374962433e-06, "loss": 86.5312, "step": 51090 }, { "epoch": 0.422715804276792, "grad_norm": 1713.26318359375, "learning_rate": 7.302094481210159e-06, "loss": 103.2178, "step": 51100 }, { "epoch": 0.4227985275261612, "grad_norm": 826.4808959960938, "learning_rate": 7.300842404290792e-06, "loss": 96.0054, "step": 51110 }, { "epoch": 0.42288125077553046, "grad_norm": 1028.5643310546875, "learning_rate": 7.2995901443039554e-06, "loss": 128.5591, "step": 51120 }, { "epoch": 0.4229639740248997, "grad_norm": 840.199462890625, "learning_rate": 7.298337701349285e-06, "loss": 116.7384, "step": 51130 }, { "epoch": 0.4230466972742689, "grad_norm": 672.873779296875, "learning_rate": 7.29708507552643e-06, "loss": 68.3276, "step": 51140 }, { "epoch": 0.42312942052363817, "grad_norm": 383.77313232421875, "learning_rate": 7.295832266935059e-06, "loss": 84.0062, "step": 51150 }, { "epoch": 0.4232121437730074, "grad_norm": 1001.8519287109375, "learning_rate": 7.2945792756748505e-06, "loss": 140.7101, "step": 51160 }, { "epoch": 0.4232948670223766, "grad_norm": 709.9981689453125, "learning_rate": 7.2933261018455005e-06, "loss": 75.1831, "step": 51170 }, { "epoch": 0.4233775902717459, "grad_norm": 675.74658203125, "learning_rate": 7.292072745546716e-06, "loss": 73.5509, "step": 51180 }, { "epoch": 0.42346031352111513, "grad_norm": 886.9816284179688, "learning_rate": 7.290819206878223e-06, "loss": 92.8137, "step": 51190 }, { "epoch": 0.4235430367704843, "grad_norm": 688.6138305664062, "learning_rate": 7.289565485939759e-06, "loss": 109.6997, "step": 51200 }, { "epoch": 0.4236257600198536, "grad_norm": 1372.497802734375, "learning_rate": 7.288311582831078e-06, "loss": 86.5049, "step": 51210 }, { "epoch": 0.42370848326922284, "grad_norm": 632.5243530273438, "learning_rate": 7.2870574976519455e-06, "loss": 73.3826, "step": 51220 }, { "epoch": 0.42379120651859203, "grad_norm": 564.6524658203125, "learning_rate": 7.2858032305021455e-06, "loss": 123.0075, "step": 51230 }, { "epoch": 0.4238739297679613, "grad_norm": 758.579833984375, "learning_rate": 7.28454878148147e-06, "loss": 97.95, "step": 51240 }, { "epoch": 0.42395665301733054, "grad_norm": 1073.8216552734375, "learning_rate": 7.283294150689735e-06, "loss": 88.5506, "step": 51250 }, { "epoch": 0.42403937626669974, "grad_norm": 621.6570434570312, "learning_rate": 7.282039338226763e-06, "loss": 106.7351, "step": 51260 }, { "epoch": 0.424122099516069, "grad_norm": 1591.994384765625, "learning_rate": 7.280784344192393e-06, "loss": 117.9606, "step": 51270 }, { "epoch": 0.42420482276543825, "grad_norm": 811.947265625, "learning_rate": 7.279529168686481e-06, "loss": 83.5623, "step": 51280 }, { "epoch": 0.42428754601480745, "grad_norm": 939.7516479492188, "learning_rate": 7.278273811808894e-06, "loss": 91.11, "step": 51290 }, { "epoch": 0.4243702692641767, "grad_norm": 1210.847900390625, "learning_rate": 7.2770182736595164e-06, "loss": 130.9754, "step": 51300 }, { "epoch": 0.42445299251354596, "grad_norm": 1220.7769775390625, "learning_rate": 7.275762554338244e-06, "loss": 76.7486, "step": 51310 }, { "epoch": 0.42453571576291516, "grad_norm": 1265.53271484375, "learning_rate": 7.2745066539449905e-06, "loss": 95.9141, "step": 51320 }, { "epoch": 0.4246184390122844, "grad_norm": 875.6475219726562, "learning_rate": 7.27325057257968e-06, "loss": 97.6407, "step": 51330 }, { "epoch": 0.42470116226165366, "grad_norm": 693.2019653320312, "learning_rate": 7.271994310342254e-06, "loss": 99.6023, "step": 51340 }, { "epoch": 0.42478388551102286, "grad_norm": 897.9375, "learning_rate": 7.270737867332669e-06, "loss": 104.1033, "step": 51350 }, { "epoch": 0.4248666087603921, "grad_norm": 1381.2633056640625, "learning_rate": 7.2694812436508934e-06, "loss": 98.0639, "step": 51360 }, { "epoch": 0.4249493320097613, "grad_norm": 825.0390625, "learning_rate": 7.268224439396909e-06, "loss": 109.9973, "step": 51370 }, { "epoch": 0.42503205525913057, "grad_norm": 997.9774780273438, "learning_rate": 7.266967454670717e-06, "loss": 115.4315, "step": 51380 }, { "epoch": 0.4251147785084998, "grad_norm": 1014.7069702148438, "learning_rate": 7.265710289572328e-06, "loss": 122.6427, "step": 51390 }, { "epoch": 0.425197501757869, "grad_norm": 1186.5941162109375, "learning_rate": 7.264452944201771e-06, "loss": 79.164, "step": 51400 }, { "epoch": 0.4252802250072383, "grad_norm": 896.6864013671875, "learning_rate": 7.263195418659083e-06, "loss": 94.9668, "step": 51410 }, { "epoch": 0.42536294825660753, "grad_norm": 1269.0845947265625, "learning_rate": 7.261937713044325e-06, "loss": 111.4878, "step": 51420 }, { "epoch": 0.42544567150597673, "grad_norm": 737.5647583007812, "learning_rate": 7.260679827457562e-06, "loss": 144.4097, "step": 51430 }, { "epoch": 0.425528394755346, "grad_norm": 1106.5899658203125, "learning_rate": 7.259421761998881e-06, "loss": 97.2455, "step": 51440 }, { "epoch": 0.42561111800471524, "grad_norm": 867.489013671875, "learning_rate": 7.2581635167683805e-06, "loss": 105.3224, "step": 51450 }, { "epoch": 0.42569384125408444, "grad_norm": 693.6383666992188, "learning_rate": 7.256905091866171e-06, "loss": 101.9842, "step": 51460 }, { "epoch": 0.4257765645034537, "grad_norm": 611.8115844726562, "learning_rate": 7.255646487392382e-06, "loss": 115.6671, "step": 51470 }, { "epoch": 0.42585928775282295, "grad_norm": 1504.69775390625, "learning_rate": 7.254387703447154e-06, "loss": 95.3933, "step": 51480 }, { "epoch": 0.42594201100219214, "grad_norm": 600.2556762695312, "learning_rate": 7.2531287401306435e-06, "loss": 92.5239, "step": 51490 }, { "epoch": 0.4260247342515614, "grad_norm": 407.8628845214844, "learning_rate": 7.251869597543019e-06, "loss": 82.7194, "step": 51500 }, { "epoch": 0.42610745750093065, "grad_norm": 614.1116943359375, "learning_rate": 7.250610275784464e-06, "loss": 93.6232, "step": 51510 }, { "epoch": 0.42619018075029985, "grad_norm": 898.9727172851562, "learning_rate": 7.2493507749551795e-06, "loss": 85.4422, "step": 51520 }, { "epoch": 0.4262729039996691, "grad_norm": 1155.338623046875, "learning_rate": 7.248091095155378e-06, "loss": 118.6169, "step": 51530 }, { "epoch": 0.42635562724903836, "grad_norm": 1159.13916015625, "learning_rate": 7.246831236485283e-06, "loss": 135.8041, "step": 51540 }, { "epoch": 0.42643835049840756, "grad_norm": 928.2463989257812, "learning_rate": 7.245571199045139e-06, "loss": 108.4106, "step": 51550 }, { "epoch": 0.4265210737477768, "grad_norm": 767.41259765625, "learning_rate": 7.244310982935202e-06, "loss": 74.6865, "step": 51560 }, { "epoch": 0.42660379699714607, "grad_norm": 709.4650268554688, "learning_rate": 7.243050588255738e-06, "loss": 76.4944, "step": 51570 }, { "epoch": 0.42668652024651527, "grad_norm": 1164.441162109375, "learning_rate": 7.241790015107034e-06, "loss": 88.2155, "step": 51580 }, { "epoch": 0.4267692434958845, "grad_norm": 1503.4273681640625, "learning_rate": 7.240529263589386e-06, "loss": 126.4441, "step": 51590 }, { "epoch": 0.4268519667452538, "grad_norm": 760.6532592773438, "learning_rate": 7.239268333803109e-06, "loss": 105.8668, "step": 51600 }, { "epoch": 0.426934689994623, "grad_norm": 1101.9364013671875, "learning_rate": 7.2380072258485265e-06, "loss": 92.2242, "step": 51610 }, { "epoch": 0.4270174132439922, "grad_norm": 554.7161254882812, "learning_rate": 7.2367459398259795e-06, "loss": 83.6779, "step": 51620 }, { "epoch": 0.4271001364933615, "grad_norm": 972.8421020507812, "learning_rate": 7.2354844758358234e-06, "loss": 123.7181, "step": 51630 }, { "epoch": 0.4271828597427307, "grad_norm": 1161.73828125, "learning_rate": 7.234222833978427e-06, "loss": 106.6433, "step": 51640 }, { "epoch": 0.42726558299209993, "grad_norm": 1067.09765625, "learning_rate": 7.232961014354175e-06, "loss": 104.9934, "step": 51650 }, { "epoch": 0.4273483062414692, "grad_norm": 1101.327392578125, "learning_rate": 7.23169901706346e-06, "loss": 106.5562, "step": 51660 }, { "epoch": 0.4274310294908384, "grad_norm": 1341.46044921875, "learning_rate": 7.2304368422067e-06, "loss": 134.7537, "step": 51670 }, { "epoch": 0.42751375274020764, "grad_norm": 1436.9295654296875, "learning_rate": 7.2291744898843145e-06, "loss": 98.4549, "step": 51680 }, { "epoch": 0.4275964759895769, "grad_norm": 540.67236328125, "learning_rate": 7.227911960196746e-06, "loss": 86.389, "step": 51690 }, { "epoch": 0.4276791992389461, "grad_norm": 1108.2845458984375, "learning_rate": 7.226649253244448e-06, "loss": 74.8376, "step": 51700 }, { "epoch": 0.42776192248831535, "grad_norm": 964.5218505859375, "learning_rate": 7.225386369127886e-06, "loss": 112.0215, "step": 51710 }, { "epoch": 0.4278446457376846, "grad_norm": 768.393310546875, "learning_rate": 7.224123307947545e-06, "loss": 94.5367, "step": 51720 }, { "epoch": 0.4279273689870538, "grad_norm": 708.0656127929688, "learning_rate": 7.2228600698039205e-06, "loss": 108.4423, "step": 51730 }, { "epoch": 0.42801009223642306, "grad_norm": 1345.224853515625, "learning_rate": 7.221596654797522e-06, "loss": 91.7173, "step": 51740 }, { "epoch": 0.42809281548579226, "grad_norm": 1059.3037109375, "learning_rate": 7.2203330630288714e-06, "loss": 109.0566, "step": 51750 }, { "epoch": 0.4281755387351615, "grad_norm": 975.2772216796875, "learning_rate": 7.21906929459851e-06, "loss": 140.6817, "step": 51760 }, { "epoch": 0.42825826198453076, "grad_norm": 1177.9395751953125, "learning_rate": 7.217805349606988e-06, "loss": 91.187, "step": 51770 }, { "epoch": 0.42834098523389996, "grad_norm": 685.8370971679688, "learning_rate": 7.216541228154875e-06, "loss": 93.8095, "step": 51780 }, { "epoch": 0.4284237084832692, "grad_norm": 1138.31689453125, "learning_rate": 7.215276930342747e-06, "loss": 104.4566, "step": 51790 }, { "epoch": 0.42850643173263847, "grad_norm": 601.1682739257812, "learning_rate": 7.214012456271202e-06, "loss": 130.8603, "step": 51800 }, { "epoch": 0.42858915498200767, "grad_norm": 1000.6312866210938, "learning_rate": 7.212747806040845e-06, "loss": 123.3161, "step": 51810 }, { "epoch": 0.4286718782313769, "grad_norm": 1142.5823974609375, "learning_rate": 7.211482979752302e-06, "loss": 85.4368, "step": 51820 }, { "epoch": 0.4287546014807462, "grad_norm": 816.385986328125, "learning_rate": 7.210217977506207e-06, "loss": 109.9975, "step": 51830 }, { "epoch": 0.4288373247301154, "grad_norm": 944.0142822265625, "learning_rate": 7.208952799403211e-06, "loss": 108.7334, "step": 51840 }, { "epoch": 0.42892004797948463, "grad_norm": 824.8801879882812, "learning_rate": 7.207687445543977e-06, "loss": 82.7929, "step": 51850 }, { "epoch": 0.4290027712288539, "grad_norm": 964.5783081054688, "learning_rate": 7.206421916029187e-06, "loss": 121.5998, "step": 51860 }, { "epoch": 0.4290854944782231, "grad_norm": 1063.656982421875, "learning_rate": 7.205156210959529e-06, "loss": 90.583, "step": 51870 }, { "epoch": 0.42916821772759234, "grad_norm": 3292.513671875, "learning_rate": 7.203890330435715e-06, "loss": 105.8095, "step": 51880 }, { "epoch": 0.4292509409769616, "grad_norm": 703.033447265625, "learning_rate": 7.202624274558458e-06, "loss": 106.7044, "step": 51890 }, { "epoch": 0.4293336642263308, "grad_norm": 669.6672973632812, "learning_rate": 7.201358043428499e-06, "loss": 89.5573, "step": 51900 }, { "epoch": 0.42941638747570005, "grad_norm": 647.8473510742188, "learning_rate": 7.200091637146582e-06, "loss": 99.5425, "step": 51910 }, { "epoch": 0.4294991107250693, "grad_norm": 1656.5313720703125, "learning_rate": 7.198825055813471e-06, "loss": 120.4202, "step": 51920 }, { "epoch": 0.4295818339744385, "grad_norm": 755.7421875, "learning_rate": 7.197558299529941e-06, "loss": 74.5723, "step": 51930 }, { "epoch": 0.42966455722380775, "grad_norm": 1194.8021240234375, "learning_rate": 7.196291368396784e-06, "loss": 104.8849, "step": 51940 }, { "epoch": 0.429747280473177, "grad_norm": 810.8287353515625, "learning_rate": 7.1950242625148e-06, "loss": 109.7433, "step": 51950 }, { "epoch": 0.4298300037225462, "grad_norm": 5541.013671875, "learning_rate": 7.1937569819848115e-06, "loss": 117.9274, "step": 51960 }, { "epoch": 0.42991272697191546, "grad_norm": 1193.3897705078125, "learning_rate": 7.192489526907646e-06, "loss": 112.9372, "step": 51970 }, { "epoch": 0.4299954502212847, "grad_norm": 1110.0302734375, "learning_rate": 7.191221897384153e-06, "loss": 120.9369, "step": 51980 }, { "epoch": 0.4300781734706539, "grad_norm": 1447.90771484375, "learning_rate": 7.189954093515189e-06, "loss": 142.4958, "step": 51990 }, { "epoch": 0.43016089672002317, "grad_norm": 1462.6444091796875, "learning_rate": 7.188686115401628e-06, "loss": 127.5024, "step": 52000 }, { "epoch": 0.4302436199693924, "grad_norm": 1592.32666015625, "learning_rate": 7.187417963144358e-06, "loss": 102.1783, "step": 52010 }, { "epoch": 0.4303263432187616, "grad_norm": 356.8098449707031, "learning_rate": 7.18614963684428e-06, "loss": 94.6831, "step": 52020 }, { "epoch": 0.4304090664681309, "grad_norm": 762.9508666992188, "learning_rate": 7.184881136602309e-06, "loss": 117.5537, "step": 52030 }, { "epoch": 0.43049178971750013, "grad_norm": 614.644775390625, "learning_rate": 7.183612462519371e-06, "loss": 120.3886, "step": 52040 }, { "epoch": 0.4305745129668693, "grad_norm": 884.6917724609375, "learning_rate": 7.182343614696412e-06, "loss": 87.1297, "step": 52050 }, { "epoch": 0.4306572362162386, "grad_norm": 1503.93603515625, "learning_rate": 7.181074593234387e-06, "loss": 102.104, "step": 52060 }, { "epoch": 0.43073995946560784, "grad_norm": 431.5899963378906, "learning_rate": 7.179805398234266e-06, "loss": 93.1396, "step": 52070 }, { "epoch": 0.43082268271497703, "grad_norm": 1156.7777099609375, "learning_rate": 7.178536029797035e-06, "loss": 113.4172, "step": 52080 }, { "epoch": 0.4309054059643463, "grad_norm": 680.9636840820312, "learning_rate": 7.177266488023688e-06, "loss": 92.4326, "step": 52090 }, { "epoch": 0.4309881292137155, "grad_norm": 1016.119873046875, "learning_rate": 7.17599677301524e-06, "loss": 83.3582, "step": 52100 }, { "epoch": 0.43107085246308474, "grad_norm": 625.1591796875, "learning_rate": 7.174726884872716e-06, "loss": 85.9998, "step": 52110 }, { "epoch": 0.431153575712454, "grad_norm": 1880.2071533203125, "learning_rate": 7.173456823697154e-06, "loss": 148.4656, "step": 52120 }, { "epoch": 0.4312362989618232, "grad_norm": 559.7216796875, "learning_rate": 7.172186589589607e-06, "loss": 117.4733, "step": 52130 }, { "epoch": 0.43131902221119245, "grad_norm": 878.204833984375, "learning_rate": 7.170916182651141e-06, "loss": 72.1657, "step": 52140 }, { "epoch": 0.4314017454605617, "grad_norm": 785.7388916015625, "learning_rate": 7.1696456029828386e-06, "loss": 78.2875, "step": 52150 }, { "epoch": 0.4314844687099309, "grad_norm": 960.450439453125, "learning_rate": 7.168374850685794e-06, "loss": 99.7961, "step": 52160 }, { "epoch": 0.43156719195930016, "grad_norm": 1000.7466430664062, "learning_rate": 7.167103925861113e-06, "loss": 113.2332, "step": 52170 }, { "epoch": 0.4316499152086694, "grad_norm": 923.0742797851562, "learning_rate": 7.165832828609918e-06, "loss": 108.3951, "step": 52180 }, { "epoch": 0.4317326384580386, "grad_norm": 1476.0849609375, "learning_rate": 7.164561559033344e-06, "loss": 104.3691, "step": 52190 }, { "epoch": 0.43181536170740786, "grad_norm": 656.1659545898438, "learning_rate": 7.163290117232542e-06, "loss": 111.1306, "step": 52200 }, { "epoch": 0.4318980849567771, "grad_norm": 1209.8184814453125, "learning_rate": 7.162018503308674e-06, "loss": 105.6424, "step": 52210 }, { "epoch": 0.4319808082061463, "grad_norm": 780.0021362304688, "learning_rate": 7.1607467173629145e-06, "loss": 90.2464, "step": 52220 }, { "epoch": 0.43206353145551557, "grad_norm": 785.8810424804688, "learning_rate": 7.1594747594964564e-06, "loss": 123.5292, "step": 52230 }, { "epoch": 0.4321462547048848, "grad_norm": 1079.1829833984375, "learning_rate": 7.1582026298105e-06, "loss": 79.3976, "step": 52240 }, { "epoch": 0.432228977954254, "grad_norm": 1131.198974609375, "learning_rate": 7.156930328406268e-06, "loss": 137.1325, "step": 52250 }, { "epoch": 0.4323117012036233, "grad_norm": 1154.282470703125, "learning_rate": 7.1556578553849875e-06, "loss": 78.7498, "step": 52260 }, { "epoch": 0.43239442445299253, "grad_norm": 783.6915283203125, "learning_rate": 7.154385210847905e-06, "loss": 96.9286, "step": 52270 }, { "epoch": 0.43247714770236173, "grad_norm": 1002.9415893554688, "learning_rate": 7.153112394896279e-06, "loss": 101.9183, "step": 52280 }, { "epoch": 0.432559870951731, "grad_norm": 805.485595703125, "learning_rate": 7.15183940763138e-06, "loss": 84.4508, "step": 52290 }, { "epoch": 0.43264259420110024, "grad_norm": 972.188720703125, "learning_rate": 7.150566249154496e-06, "loss": 112.0686, "step": 52300 }, { "epoch": 0.43272531745046944, "grad_norm": 694.895263671875, "learning_rate": 7.149292919566924e-06, "loss": 86.5633, "step": 52310 }, { "epoch": 0.4328080406998387, "grad_norm": 507.7707214355469, "learning_rate": 7.148019418969979e-06, "loss": 108.7999, "step": 52320 }, { "epoch": 0.43289076394920795, "grad_norm": 928.7743530273438, "learning_rate": 7.146745747464987e-06, "loss": 133.0144, "step": 52330 }, { "epoch": 0.43297348719857714, "grad_norm": 888.1306762695312, "learning_rate": 7.145471905153288e-06, "loss": 74.0938, "step": 52340 }, { "epoch": 0.4330562104479464, "grad_norm": 602.6085815429688, "learning_rate": 7.1441978921362365e-06, "loss": 106.0087, "step": 52350 }, { "epoch": 0.43313893369731565, "grad_norm": 739.8253784179688, "learning_rate": 7.142923708515199e-06, "loss": 130.5773, "step": 52360 }, { "epoch": 0.43322165694668485, "grad_norm": 561.8637084960938, "learning_rate": 7.141649354391556e-06, "loss": 116.5679, "step": 52370 }, { "epoch": 0.4333043801960541, "grad_norm": 635.5332641601562, "learning_rate": 7.140374829866703e-06, "loss": 102.7501, "step": 52380 }, { "epoch": 0.43338710344542336, "grad_norm": 849.3636474609375, "learning_rate": 7.1391001350420486e-06, "loss": 112.318, "step": 52390 }, { "epoch": 0.43346982669479256, "grad_norm": 968.818603515625, "learning_rate": 7.137825270019012e-06, "loss": 116.6222, "step": 52400 }, { "epoch": 0.4335525499441618, "grad_norm": 879.7605590820312, "learning_rate": 7.1365502348990315e-06, "loss": 72.7146, "step": 52410 }, { "epoch": 0.43363527319353107, "grad_norm": 974.20947265625, "learning_rate": 7.135275029783554e-06, "loss": 87.4071, "step": 52420 }, { "epoch": 0.43371799644290027, "grad_norm": 898.2373046875, "learning_rate": 7.133999654774041e-06, "loss": 96.0373, "step": 52430 }, { "epoch": 0.4338007196922695, "grad_norm": 1289.7435302734375, "learning_rate": 7.13272410997197e-06, "loss": 121.3297, "step": 52440 }, { "epoch": 0.4338834429416388, "grad_norm": 406.2857666015625, "learning_rate": 7.13144839547883e-06, "loss": 72.3889, "step": 52450 }, { "epoch": 0.433966166191008, "grad_norm": 1770.5550537109375, "learning_rate": 7.130172511396123e-06, "loss": 157.1078, "step": 52460 }, { "epoch": 0.43404888944037723, "grad_norm": 924.5420532226562, "learning_rate": 7.128896457825364e-06, "loss": 94.4494, "step": 52470 }, { "epoch": 0.4341316126897464, "grad_norm": 1168.6419677734375, "learning_rate": 7.127620234868085e-06, "loss": 151.9046, "step": 52480 }, { "epoch": 0.4342143359391157, "grad_norm": 923.4803466796875, "learning_rate": 7.126343842625828e-06, "loss": 103.1586, "step": 52490 }, { "epoch": 0.43429705918848494, "grad_norm": 849.2039794921875, "learning_rate": 7.1250672812001505e-06, "loss": 98.9931, "step": 52500 }, { "epoch": 0.43437978243785413, "grad_norm": 1026.3223876953125, "learning_rate": 7.123790550692624e-06, "loss": 81.8112, "step": 52510 }, { "epoch": 0.4344625056872234, "grad_norm": 1015.2179565429688, "learning_rate": 7.1225136512048275e-06, "loss": 98.0542, "step": 52520 }, { "epoch": 0.43454522893659264, "grad_norm": 756.0176391601562, "learning_rate": 7.1212365828383615e-06, "loss": 143.049, "step": 52530 }, { "epoch": 0.43462795218596184, "grad_norm": 919.13427734375, "learning_rate": 7.119959345694835e-06, "loss": 89.8264, "step": 52540 }, { "epoch": 0.4347106754353311, "grad_norm": 693.4651489257812, "learning_rate": 7.118681939875875e-06, "loss": 129.897, "step": 52550 }, { "epoch": 0.43479339868470035, "grad_norm": 1032.639404296875, "learning_rate": 7.117404365483116e-06, "loss": 109.7115, "step": 52560 }, { "epoch": 0.43487612193406955, "grad_norm": 644.7308959960938, "learning_rate": 7.116126622618207e-06, "loss": 121.4155, "step": 52570 }, { "epoch": 0.4349588451834388, "grad_norm": 1076.7471923828125, "learning_rate": 7.114848711382816e-06, "loss": 105.8533, "step": 52580 }, { "epoch": 0.43504156843280806, "grad_norm": 682.4072265625, "learning_rate": 7.1135706318786195e-06, "loss": 126.4976, "step": 52590 }, { "epoch": 0.43512429168217726, "grad_norm": 334.1803894042969, "learning_rate": 7.112292384207306e-06, "loss": 72.7947, "step": 52600 }, { "epoch": 0.4352070149315465, "grad_norm": 501.4839172363281, "learning_rate": 7.111013968470581e-06, "loss": 88.0988, "step": 52610 }, { "epoch": 0.43528973818091576, "grad_norm": 1030.7449951171875, "learning_rate": 7.109735384770166e-06, "loss": 92.1345, "step": 52620 }, { "epoch": 0.43537246143028496, "grad_norm": 1418.4031982421875, "learning_rate": 7.108456633207787e-06, "loss": 137.2343, "step": 52630 }, { "epoch": 0.4354551846796542, "grad_norm": 1056.733154296875, "learning_rate": 7.10717771388519e-06, "loss": 122.0539, "step": 52640 }, { "epoch": 0.43553790792902347, "grad_norm": 567.1400756835938, "learning_rate": 7.105898626904134e-06, "loss": 97.4046, "step": 52650 }, { "epoch": 0.43562063117839267, "grad_norm": 557.343017578125, "learning_rate": 7.104619372366387e-06, "loss": 97.5606, "step": 52660 }, { "epoch": 0.4357033544277619, "grad_norm": 663.3614501953125, "learning_rate": 7.103339950373737e-06, "loss": 78.0228, "step": 52670 }, { "epoch": 0.4357860776771312, "grad_norm": 726.0701293945312, "learning_rate": 7.102060361027981e-06, "loss": 122.9625, "step": 52680 }, { "epoch": 0.4358688009265004, "grad_norm": 833.1370239257812, "learning_rate": 7.100780604430928e-06, "loss": 92.9005, "step": 52690 }, { "epoch": 0.43595152417586963, "grad_norm": 1035.490478515625, "learning_rate": 7.099500680684404e-06, "loss": 82.178, "step": 52700 }, { "epoch": 0.4360342474252389, "grad_norm": 721.7255859375, "learning_rate": 7.0982205898902444e-06, "loss": 134.4474, "step": 52710 }, { "epoch": 0.4361169706746081, "grad_norm": 872.9884033203125, "learning_rate": 7.096940332150305e-06, "loss": 112.2354, "step": 52720 }, { "epoch": 0.43619969392397734, "grad_norm": 504.2191162109375, "learning_rate": 7.095659907566446e-06, "loss": 69.3615, "step": 52730 }, { "epoch": 0.4362824171733466, "grad_norm": 1016.4193725585938, "learning_rate": 7.094379316240545e-06, "loss": 88.9207, "step": 52740 }, { "epoch": 0.4363651404227158, "grad_norm": 972.1260986328125, "learning_rate": 7.093098558274494e-06, "loss": 104.1136, "step": 52750 }, { "epoch": 0.43644786367208505, "grad_norm": 1002.4033203125, "learning_rate": 7.091817633770197e-06, "loss": 94.7899, "step": 52760 }, { "epoch": 0.4365305869214543, "grad_norm": 880.1514892578125, "learning_rate": 7.090536542829571e-06, "loss": 87.8467, "step": 52770 }, { "epoch": 0.4366133101708235, "grad_norm": 620.6061401367188, "learning_rate": 7.089255285554546e-06, "loss": 136.4645, "step": 52780 }, { "epoch": 0.43669603342019275, "grad_norm": 1144.162353515625, "learning_rate": 7.087973862047067e-06, "loss": 100.7308, "step": 52790 }, { "epoch": 0.436778756669562, "grad_norm": 848.2889404296875, "learning_rate": 7.08669227240909e-06, "loss": 87.7345, "step": 52800 }, { "epoch": 0.4368614799189312, "grad_norm": 812.3153686523438, "learning_rate": 7.085410516742586e-06, "loss": 101.8244, "step": 52810 }, { "epoch": 0.43694420316830046, "grad_norm": 872.6069946289062, "learning_rate": 7.084128595149538e-06, "loss": 104.2199, "step": 52820 }, { "epoch": 0.43702692641766966, "grad_norm": 897.1786499023438, "learning_rate": 7.082846507731942e-06, "loss": 96.152, "step": 52830 }, { "epoch": 0.4371096496670389, "grad_norm": 631.2103271484375, "learning_rate": 7.081564254591809e-06, "loss": 101.3164, "step": 52840 }, { "epoch": 0.43719237291640817, "grad_norm": 781.96044921875, "learning_rate": 7.08028183583116e-06, "loss": 114.7007, "step": 52850 }, { "epoch": 0.43727509616577737, "grad_norm": 995.4803466796875, "learning_rate": 7.078999251552034e-06, "loss": 98.927, "step": 52860 }, { "epoch": 0.4373578194151466, "grad_norm": 841.7239990234375, "learning_rate": 7.077716501856478e-06, "loss": 106.8644, "step": 52870 }, { "epoch": 0.4374405426645159, "grad_norm": 455.7330322265625, "learning_rate": 7.076433586846555e-06, "loss": 119.5307, "step": 52880 }, { "epoch": 0.4375232659138851, "grad_norm": 1019.272216796875, "learning_rate": 7.075150506624342e-06, "loss": 120.9018, "step": 52890 }, { "epoch": 0.4376059891632543, "grad_norm": 783.7548828125, "learning_rate": 7.073867261291926e-06, "loss": 107.6927, "step": 52900 }, { "epoch": 0.4376887124126236, "grad_norm": 483.0557556152344, "learning_rate": 7.0725838509514115e-06, "loss": 57.1351, "step": 52910 }, { "epoch": 0.4377714356619928, "grad_norm": 1054.7470703125, "learning_rate": 7.07130027570491e-06, "loss": 99.6627, "step": 52920 }, { "epoch": 0.43785415891136203, "grad_norm": 716.9881591796875, "learning_rate": 7.070016535654551e-06, "loss": 85.3958, "step": 52930 }, { "epoch": 0.4379368821607313, "grad_norm": 1354.7027587890625, "learning_rate": 7.068732630902479e-06, "loss": 107.1556, "step": 52940 }, { "epoch": 0.4380196054101005, "grad_norm": 1029.6153564453125, "learning_rate": 7.067448561550844e-06, "loss": 92.6526, "step": 52950 }, { "epoch": 0.43810232865946974, "grad_norm": 1133.869873046875, "learning_rate": 7.066164327701815e-06, "loss": 149.094, "step": 52960 }, { "epoch": 0.438185051908839, "grad_norm": 1119.0980224609375, "learning_rate": 7.064879929457573e-06, "loss": 143.9678, "step": 52970 }, { "epoch": 0.4382677751582082, "grad_norm": 643.8549194335938, "learning_rate": 7.063595366920314e-06, "loss": 76.5045, "step": 52980 }, { "epoch": 0.43835049840757745, "grad_norm": 1303.5244140625, "learning_rate": 7.062310640192239e-06, "loss": 111.4981, "step": 52990 }, { "epoch": 0.4384332216569467, "grad_norm": 862.070556640625, "learning_rate": 7.061025749375572e-06, "loss": 100.7834, "step": 53000 }, { "epoch": 0.4385159449063159, "grad_norm": 776.7830810546875, "learning_rate": 7.059740694572545e-06, "loss": 103.979, "step": 53010 }, { "epoch": 0.43859866815568516, "grad_norm": 449.9796447753906, "learning_rate": 7.058455475885405e-06, "loss": 85.2324, "step": 53020 }, { "epoch": 0.4386813914050544, "grad_norm": 1027.198486328125, "learning_rate": 7.05717009341641e-06, "loss": 93.9646, "step": 53030 }, { "epoch": 0.4387641146544236, "grad_norm": 1561.4610595703125, "learning_rate": 7.05588454726783e-06, "loss": 98.5875, "step": 53040 }, { "epoch": 0.43884683790379286, "grad_norm": 886.0679931640625, "learning_rate": 7.054598837541951e-06, "loss": 91.799, "step": 53050 }, { "epoch": 0.4389295611531621, "grad_norm": 822.8799438476562, "learning_rate": 7.053312964341075e-06, "loss": 96.1325, "step": 53060 }, { "epoch": 0.4390122844025313, "grad_norm": 1466.96337890625, "learning_rate": 7.052026927767508e-06, "loss": 125.8915, "step": 53070 }, { "epoch": 0.43909500765190057, "grad_norm": 1202.5977783203125, "learning_rate": 7.050740727923576e-06, "loss": 100.9376, "step": 53080 }, { "epoch": 0.4391777309012698, "grad_norm": 834.5575561523438, "learning_rate": 7.049454364911615e-06, "loss": 100.5369, "step": 53090 }, { "epoch": 0.439260454150639, "grad_norm": 589.8638916015625, "learning_rate": 7.048167838833977e-06, "loss": 94.5479, "step": 53100 }, { "epoch": 0.4393431774000083, "grad_norm": 722.2205200195312, "learning_rate": 7.046881149793026e-06, "loss": 94.6359, "step": 53110 }, { "epoch": 0.43942590064937753, "grad_norm": 706.8112182617188, "learning_rate": 7.045594297891133e-06, "loss": 80.4411, "step": 53120 }, { "epoch": 0.43950862389874673, "grad_norm": 1541.61279296875, "learning_rate": 7.04430728323069e-06, "loss": 126.8126, "step": 53130 }, { "epoch": 0.439591347148116, "grad_norm": 1694.7259521484375, "learning_rate": 7.043020105914098e-06, "loss": 112.2094, "step": 53140 }, { "epoch": 0.43967407039748524, "grad_norm": 1049.023193359375, "learning_rate": 7.041732766043775e-06, "loss": 96.9257, "step": 53150 }, { "epoch": 0.43975679364685444, "grad_norm": 928.5380859375, "learning_rate": 7.040445263722145e-06, "loss": 90.1928, "step": 53160 }, { "epoch": 0.4398395168962237, "grad_norm": 517.4678344726562, "learning_rate": 7.039157599051648e-06, "loss": 100.6094, "step": 53170 }, { "epoch": 0.43992224014559295, "grad_norm": 1040.879638671875, "learning_rate": 7.037869772134741e-06, "loss": 93.181, "step": 53180 }, { "epoch": 0.44000496339496215, "grad_norm": 1226.857421875, "learning_rate": 7.036581783073888e-06, "loss": 139.1528, "step": 53190 }, { "epoch": 0.4400876866443314, "grad_norm": 2283.062744140625, "learning_rate": 7.035293631971569e-06, "loss": 121.1719, "step": 53200 }, { "epoch": 0.4401704098937006, "grad_norm": 1155.0572509765625, "learning_rate": 7.034005318930277e-06, "loss": 86.4772, "step": 53210 }, { "epoch": 0.44025313314306985, "grad_norm": 1117.125244140625, "learning_rate": 7.032716844052517e-06, "loss": 111.3512, "step": 53220 }, { "epoch": 0.4403358563924391, "grad_norm": 1401.2935791015625, "learning_rate": 7.031428207440807e-06, "loss": 122.4468, "step": 53230 }, { "epoch": 0.4404185796418083, "grad_norm": 719.4153442382812, "learning_rate": 7.030139409197676e-06, "loss": 86.415, "step": 53240 }, { "epoch": 0.44050130289117756, "grad_norm": 802.9329833984375, "learning_rate": 7.02885044942567e-06, "loss": 161.6186, "step": 53250 }, { "epoch": 0.4405840261405468, "grad_norm": 720.5238037109375, "learning_rate": 7.027561328227345e-06, "loss": 109.4637, "step": 53260 }, { "epoch": 0.440666749389916, "grad_norm": 786.7742919921875, "learning_rate": 7.02627204570527e-06, "loss": 113.8812, "step": 53270 }, { "epoch": 0.44074947263928527, "grad_norm": 449.480712890625, "learning_rate": 7.024982601962027e-06, "loss": 94.8391, "step": 53280 }, { "epoch": 0.4408321958886545, "grad_norm": 934.8798217773438, "learning_rate": 7.023692997100213e-06, "loss": 79.2688, "step": 53290 }, { "epoch": 0.4409149191380237, "grad_norm": 1120.0533447265625, "learning_rate": 7.0224032312224345e-06, "loss": 91.2629, "step": 53300 }, { "epoch": 0.440997642387393, "grad_norm": 1041.491943359375, "learning_rate": 7.021113304431313e-06, "loss": 65.5652, "step": 53310 }, { "epoch": 0.44108036563676223, "grad_norm": 1426.9508056640625, "learning_rate": 7.01982321682948e-06, "loss": 106.7264, "step": 53320 }, { "epoch": 0.4411630888861314, "grad_norm": 1245.83154296875, "learning_rate": 7.018532968519584e-06, "loss": 97.5596, "step": 53330 }, { "epoch": 0.4412458121355007, "grad_norm": 667.5186157226562, "learning_rate": 7.0172425596042846e-06, "loss": 96.0253, "step": 53340 }, { "epoch": 0.44132853538486994, "grad_norm": 558.8970947265625, "learning_rate": 7.0159519901862515e-06, "loss": 108.9551, "step": 53350 }, { "epoch": 0.44141125863423913, "grad_norm": 636.3999633789062, "learning_rate": 7.014661260368171e-06, "loss": 84.9733, "step": 53360 }, { "epoch": 0.4414939818836084, "grad_norm": 927.197265625, "learning_rate": 7.01337037025274e-06, "loss": 111.2972, "step": 53370 }, { "epoch": 0.44157670513297764, "grad_norm": 786.5078125, "learning_rate": 7.012079319942668e-06, "loss": 95.5135, "step": 53380 }, { "epoch": 0.44165942838234684, "grad_norm": 807.628173828125, "learning_rate": 7.01078810954068e-06, "loss": 138.3115, "step": 53390 }, { "epoch": 0.4417421516317161, "grad_norm": 657.76123046875, "learning_rate": 7.0094967391495095e-06, "loss": 96.7088, "step": 53400 }, { "epoch": 0.44182487488108535, "grad_norm": 414.78240966796875, "learning_rate": 7.008205208871906e-06, "loss": 77.1027, "step": 53410 }, { "epoch": 0.44190759813045455, "grad_norm": 719.4336547851562, "learning_rate": 7.00691351881063e-06, "loss": 95.9553, "step": 53420 }, { "epoch": 0.4419903213798238, "grad_norm": 1148.5152587890625, "learning_rate": 7.005621669068456e-06, "loss": 109.5746, "step": 53430 }, { "epoch": 0.44207304462919306, "grad_norm": 508.8988342285156, "learning_rate": 7.004329659748172e-06, "loss": 123.2771, "step": 53440 }, { "epoch": 0.44215576787856226, "grad_norm": 777.6555786132812, "learning_rate": 7.003037490952574e-06, "loss": 101.2551, "step": 53450 }, { "epoch": 0.4422384911279315, "grad_norm": 1049.9522705078125, "learning_rate": 7.0017451627844765e-06, "loss": 137.9618, "step": 53460 }, { "epoch": 0.44232121437730076, "grad_norm": 666.4544677734375, "learning_rate": 7.0004526753467004e-06, "loss": 109.7146, "step": 53470 }, { "epoch": 0.44240393762666996, "grad_norm": 1020.0592651367188, "learning_rate": 6.999160028742089e-06, "loss": 113.0266, "step": 53480 }, { "epoch": 0.4424866608760392, "grad_norm": 646.3621215820312, "learning_rate": 6.997867223073487e-06, "loss": 148.3913, "step": 53490 }, { "epoch": 0.44256938412540847, "grad_norm": 1154.8201904296875, "learning_rate": 6.996574258443761e-06, "loss": 111.0904, "step": 53500 }, { "epoch": 0.44265210737477767, "grad_norm": 1100.033935546875, "learning_rate": 6.995281134955784e-06, "loss": 90.6527, "step": 53510 }, { "epoch": 0.4427348306241469, "grad_norm": 1160.05712890625, "learning_rate": 6.993987852712442e-06, "loss": 113.9817, "step": 53520 }, { "epoch": 0.4428175538735162, "grad_norm": 1008.1426391601562, "learning_rate": 6.992694411816638e-06, "loss": 91.588, "step": 53530 }, { "epoch": 0.4429002771228854, "grad_norm": 892.0647583007812, "learning_rate": 6.991400812371287e-06, "loss": 127.6992, "step": 53540 }, { "epoch": 0.44298300037225463, "grad_norm": 700.9825439453125, "learning_rate": 6.990107054479313e-06, "loss": 85.8635, "step": 53550 }, { "epoch": 0.44306572362162383, "grad_norm": 452.05950927734375, "learning_rate": 6.988813138243652e-06, "loss": 109.3417, "step": 53560 }, { "epoch": 0.4431484468709931, "grad_norm": 1181.6788330078125, "learning_rate": 6.987519063767257e-06, "loss": 116.6035, "step": 53570 }, { "epoch": 0.44323117012036234, "grad_norm": 991.6622924804688, "learning_rate": 6.986224831153092e-06, "loss": 78.8246, "step": 53580 }, { "epoch": 0.44331389336973154, "grad_norm": 1134.1353759765625, "learning_rate": 6.984930440504134e-06, "loss": 113.1138, "step": 53590 }, { "epoch": 0.4433966166191008, "grad_norm": 708.4700317382812, "learning_rate": 6.9836358919233695e-06, "loss": 79.0538, "step": 53600 }, { "epoch": 0.44347933986847005, "grad_norm": 917.70166015625, "learning_rate": 6.982341185513799e-06, "loss": 88.8924, "step": 53610 }, { "epoch": 0.44356206311783924, "grad_norm": 966.7335815429688, "learning_rate": 6.981046321378441e-06, "loss": 142.0511, "step": 53620 }, { "epoch": 0.4436447863672085, "grad_norm": 804.2879028320312, "learning_rate": 6.979751299620318e-06, "loss": 75.3757, "step": 53630 }, { "epoch": 0.44372750961657775, "grad_norm": 1585.9019775390625, "learning_rate": 6.978456120342469e-06, "loss": 95.9753, "step": 53640 }, { "epoch": 0.44381023286594695, "grad_norm": 1567.5927734375, "learning_rate": 6.977160783647947e-06, "loss": 126.4141, "step": 53650 }, { "epoch": 0.4438929561153162, "grad_norm": 1687.5382080078125, "learning_rate": 6.975865289639815e-06, "loss": 90.8707, "step": 53660 }, { "epoch": 0.44397567936468546, "grad_norm": 973.2371215820312, "learning_rate": 6.974569638421151e-06, "loss": 82.7147, "step": 53670 }, { "epoch": 0.44405840261405466, "grad_norm": 658.38818359375, "learning_rate": 6.973273830095042e-06, "loss": 85.3202, "step": 53680 }, { "epoch": 0.4441411258634239, "grad_norm": 1075.40673828125, "learning_rate": 6.971977864764591e-06, "loss": 99.2303, "step": 53690 }, { "epoch": 0.44422384911279317, "grad_norm": 910.6012573242188, "learning_rate": 6.970681742532911e-06, "loss": 129.7319, "step": 53700 }, { "epoch": 0.44430657236216237, "grad_norm": 1031.2911376953125, "learning_rate": 6.969385463503129e-06, "loss": 96.3028, "step": 53710 }, { "epoch": 0.4443892956115316, "grad_norm": 679.598876953125, "learning_rate": 6.968089027778384e-06, "loss": 91.5366, "step": 53720 }, { "epoch": 0.4444720188609009, "grad_norm": 842.2387084960938, "learning_rate": 6.9667924354618275e-06, "loss": 107.6039, "step": 53730 }, { "epoch": 0.4445547421102701, "grad_norm": 645.2871704101562, "learning_rate": 6.965495686656623e-06, "loss": 125.044, "step": 53740 }, { "epoch": 0.44463746535963933, "grad_norm": 614.8228149414062, "learning_rate": 6.964198781465948e-06, "loss": 101.3111, "step": 53750 }, { "epoch": 0.4447201886090086, "grad_norm": 942.0760498046875, "learning_rate": 6.962901719992989e-06, "loss": 89.1372, "step": 53760 }, { "epoch": 0.4448029118583778, "grad_norm": 577.6919555664062, "learning_rate": 6.961604502340949e-06, "loss": 59.7649, "step": 53770 }, { "epoch": 0.44488563510774704, "grad_norm": 1402.629638671875, "learning_rate": 6.960307128613042e-06, "loss": 133.4121, "step": 53780 }, { "epoch": 0.4449683583571163, "grad_norm": 1055.0478515625, "learning_rate": 6.959009598912493e-06, "loss": 127.3038, "step": 53790 }, { "epoch": 0.4450510816064855, "grad_norm": 1320.6951904296875, "learning_rate": 6.957711913342541e-06, "loss": 86.9509, "step": 53800 }, { "epoch": 0.44513380485585474, "grad_norm": 1073.6241455078125, "learning_rate": 6.956414072006437e-06, "loss": 122.6924, "step": 53810 }, { "epoch": 0.445216528105224, "grad_norm": 870.7139282226562, "learning_rate": 6.955116075007443e-06, "loss": 124.2368, "step": 53820 }, { "epoch": 0.4452992513545932, "grad_norm": 1268.8851318359375, "learning_rate": 6.953817922448837e-06, "loss": 89.0271, "step": 53830 }, { "epoch": 0.44538197460396245, "grad_norm": 1143.03955078125, "learning_rate": 6.9525196144339055e-06, "loss": 145.819, "step": 53840 }, { "epoch": 0.4454646978533317, "grad_norm": 1068.4166259765625, "learning_rate": 6.951221151065948e-06, "loss": 131.2076, "step": 53850 }, { "epoch": 0.4455474211027009, "grad_norm": 797.7089233398438, "learning_rate": 6.949922532448279e-06, "loss": 98.0425, "step": 53860 }, { "epoch": 0.44563014435207016, "grad_norm": 760.931640625, "learning_rate": 6.948623758684223e-06, "loss": 96.0778, "step": 53870 }, { "epoch": 0.4457128676014394, "grad_norm": 624.017822265625, "learning_rate": 6.9473248298771176e-06, "loss": 89.5199, "step": 53880 }, { "epoch": 0.4457955908508086, "grad_norm": 689.559814453125, "learning_rate": 6.946025746130312e-06, "loss": 123.3743, "step": 53890 }, { "epoch": 0.44587831410017786, "grad_norm": 1045.9923095703125, "learning_rate": 6.944726507547169e-06, "loss": 100.6308, "step": 53900 }, { "epoch": 0.44596103734954706, "grad_norm": 1568.38330078125, "learning_rate": 6.943427114231064e-06, "loss": 136.4211, "step": 53910 }, { "epoch": 0.4460437605989163, "grad_norm": 1263.7076416015625, "learning_rate": 6.942127566285382e-06, "loss": 89.5075, "step": 53920 }, { "epoch": 0.44612648384828557, "grad_norm": 1032.7841796875, "learning_rate": 6.940827863813523e-06, "loss": 124.588, "step": 53930 }, { "epoch": 0.44620920709765477, "grad_norm": 1023.0800170898438, "learning_rate": 6.9395280069188964e-06, "loss": 127.864, "step": 53940 }, { "epoch": 0.446291930347024, "grad_norm": 684.29931640625, "learning_rate": 6.9382279957049295e-06, "loss": 106.943, "step": 53950 }, { "epoch": 0.4463746535963933, "grad_norm": 590.7765502929688, "learning_rate": 6.936927830275055e-06, "loss": 80.368, "step": 53960 }, { "epoch": 0.4464573768457625, "grad_norm": 953.7996826171875, "learning_rate": 6.935627510732724e-06, "loss": 114.7125, "step": 53970 }, { "epoch": 0.44654010009513173, "grad_norm": 665.14697265625, "learning_rate": 6.934327037181394e-06, "loss": 107.5566, "step": 53980 }, { "epoch": 0.446622823344501, "grad_norm": 933.9505615234375, "learning_rate": 6.933026409724538e-06, "loss": 119.2751, "step": 53990 }, { "epoch": 0.4467055465938702, "grad_norm": 1047.66796875, "learning_rate": 6.931725628465643e-06, "loss": 84.2427, "step": 54000 }, { "epoch": 0.44678826984323944, "grad_norm": 1026.7486572265625, "learning_rate": 6.9304246935082065e-06, "loss": 80.6261, "step": 54010 }, { "epoch": 0.4468709930926087, "grad_norm": 948.4655151367188, "learning_rate": 6.929123604955735e-06, "loss": 128.3851, "step": 54020 }, { "epoch": 0.4469537163419779, "grad_norm": 1473.8193359375, "learning_rate": 6.927822362911753e-06, "loss": 83.6743, "step": 54030 }, { "epoch": 0.44703643959134715, "grad_norm": 677.3907470703125, "learning_rate": 6.926520967479791e-06, "loss": 96.5376, "step": 54040 }, { "epoch": 0.4471191628407164, "grad_norm": 759.7684326171875, "learning_rate": 6.9252194187634e-06, "loss": 85.2003, "step": 54050 }, { "epoch": 0.4472018860900856, "grad_norm": 1008.9971923828125, "learning_rate": 6.923917716866133e-06, "loss": 108.9541, "step": 54060 }, { "epoch": 0.44728460933945485, "grad_norm": 1046.4508056640625, "learning_rate": 6.922615861891564e-06, "loss": 73.9177, "step": 54070 }, { "epoch": 0.4473673325888241, "grad_norm": 1061.5517578125, "learning_rate": 6.921313853943275e-06, "loss": 116.9172, "step": 54080 }, { "epoch": 0.4474500558381933, "grad_norm": 1054.0621337890625, "learning_rate": 6.9200116931248575e-06, "loss": 94.5179, "step": 54090 }, { "epoch": 0.44753277908756256, "grad_norm": 1365.836181640625, "learning_rate": 6.918709379539924e-06, "loss": 91.8605, "step": 54100 }, { "epoch": 0.4476155023369318, "grad_norm": 914.6397094726562, "learning_rate": 6.917406913292089e-06, "loss": 95.1237, "step": 54110 }, { "epoch": 0.447698225586301, "grad_norm": 1938.41064453125, "learning_rate": 6.916104294484988e-06, "loss": 133.195, "step": 54120 }, { "epoch": 0.44778094883567027, "grad_norm": 728.0489501953125, "learning_rate": 6.91480152322226e-06, "loss": 105.5754, "step": 54130 }, { "epoch": 0.4478636720850395, "grad_norm": 1188.586181640625, "learning_rate": 6.913498599607563e-06, "loss": 110.4302, "step": 54140 }, { "epoch": 0.4479463953344087, "grad_norm": 1016.9649047851562, "learning_rate": 6.9121955237445644e-06, "loss": 75.1243, "step": 54150 }, { "epoch": 0.448029118583778, "grad_norm": 1644.49755859375, "learning_rate": 6.910892295736944e-06, "loss": 87.0271, "step": 54160 }, { "epoch": 0.44811184183314723, "grad_norm": 885.784423828125, "learning_rate": 6.9095889156883934e-06, "loss": 104.9015, "step": 54170 }, { "epoch": 0.4481945650825164, "grad_norm": 575.415283203125, "learning_rate": 6.908285383702617e-06, "loss": 90.7336, "step": 54180 }, { "epoch": 0.4482772883318857, "grad_norm": 876.1041870117188, "learning_rate": 6.906981699883329e-06, "loss": 93.0574, "step": 54190 }, { "epoch": 0.44836001158125494, "grad_norm": 665.2525024414062, "learning_rate": 6.90567786433426e-06, "loss": 98.4839, "step": 54200 }, { "epoch": 0.44844273483062413, "grad_norm": 601.7535400390625, "learning_rate": 6.904373877159149e-06, "loss": 88.9101, "step": 54210 }, { "epoch": 0.4485254580799934, "grad_norm": 761.7291870117188, "learning_rate": 6.903069738461749e-06, "loss": 90.6817, "step": 54220 }, { "epoch": 0.44860818132936264, "grad_norm": 510.3106994628906, "learning_rate": 6.901765448345823e-06, "loss": 172.6727, "step": 54230 }, { "epoch": 0.44869090457873184, "grad_norm": 836.3589477539062, "learning_rate": 6.900461006915149e-06, "loss": 107.1047, "step": 54240 }, { "epoch": 0.4487736278281011, "grad_norm": 981.2500610351562, "learning_rate": 6.899156414273514e-06, "loss": 102.4325, "step": 54250 }, { "epoch": 0.44885635107747035, "grad_norm": 996.1396484375, "learning_rate": 6.89785167052472e-06, "loss": 76.1564, "step": 54260 }, { "epoch": 0.44893907432683955, "grad_norm": 1051.859619140625, "learning_rate": 6.896546775772577e-06, "loss": 89.3364, "step": 54270 }, { "epoch": 0.4490217975762088, "grad_norm": 743.7044677734375, "learning_rate": 6.8952417301209114e-06, "loss": 92.615, "step": 54280 }, { "epoch": 0.449104520825578, "grad_norm": 1194.3951416015625, "learning_rate": 6.893936533673561e-06, "loss": 87.0885, "step": 54290 }, { "epoch": 0.44918724407494726, "grad_norm": 855.311767578125, "learning_rate": 6.892631186534371e-06, "loss": 94.2941, "step": 54300 }, { "epoch": 0.4492699673243165, "grad_norm": 804.5349731445312, "learning_rate": 6.891325688807204e-06, "loss": 119.2308, "step": 54310 }, { "epoch": 0.4493526905736857, "grad_norm": 1499.4466552734375, "learning_rate": 6.890020040595932e-06, "loss": 117.2243, "step": 54320 }, { "epoch": 0.44943541382305496, "grad_norm": 907.1102905273438, "learning_rate": 6.88871424200444e-06, "loss": 120.5858, "step": 54330 }, { "epoch": 0.4495181370724242, "grad_norm": 1519.6060791015625, "learning_rate": 6.887408293136621e-06, "loss": 98.4492, "step": 54340 }, { "epoch": 0.4496008603217934, "grad_norm": 802.8118896484375, "learning_rate": 6.886102194096389e-06, "loss": 67.4142, "step": 54350 }, { "epoch": 0.44968358357116267, "grad_norm": 638.83935546875, "learning_rate": 6.884795944987661e-06, "loss": 89.7945, "step": 54360 }, { "epoch": 0.4497663068205319, "grad_norm": 857.33984375, "learning_rate": 6.8834895459143694e-06, "loss": 103.017, "step": 54370 }, { "epoch": 0.4498490300699011, "grad_norm": 1263.925537109375, "learning_rate": 6.882182996980457e-06, "loss": 80.3623, "step": 54380 }, { "epoch": 0.4499317533192704, "grad_norm": 617.5503540039062, "learning_rate": 6.880876298289885e-06, "loss": 90.8478, "step": 54390 }, { "epoch": 0.45001447656863963, "grad_norm": 978.7340087890625, "learning_rate": 6.879569449946617e-06, "loss": 85.7712, "step": 54400 }, { "epoch": 0.45009719981800883, "grad_norm": 1923.4696044921875, "learning_rate": 6.878262452054632e-06, "loss": 119.6836, "step": 54410 }, { "epoch": 0.4501799230673781, "grad_norm": 713.7830200195312, "learning_rate": 6.876955304717925e-06, "loss": 99.3105, "step": 54420 }, { "epoch": 0.45026264631674734, "grad_norm": 714.7610473632812, "learning_rate": 6.875648008040499e-06, "loss": 67.4421, "step": 54430 }, { "epoch": 0.45034536956611654, "grad_norm": 1194.353271484375, "learning_rate": 6.874340562126368e-06, "loss": 97.9703, "step": 54440 }, { "epoch": 0.4504280928154858, "grad_norm": 747.249755859375, "learning_rate": 6.873032967079562e-06, "loss": 113.6194, "step": 54450 }, { "epoch": 0.45051081606485505, "grad_norm": 1645.705322265625, "learning_rate": 6.871725223004118e-06, "loss": 134.7527, "step": 54460 }, { "epoch": 0.45059353931422425, "grad_norm": 704.1566162109375, "learning_rate": 6.870417330004086e-06, "loss": 97.6264, "step": 54470 }, { "epoch": 0.4506762625635935, "grad_norm": 945.1201782226562, "learning_rate": 6.869109288183534e-06, "loss": 82.3648, "step": 54480 }, { "epoch": 0.45075898581296275, "grad_norm": 752.2423706054688, "learning_rate": 6.867801097646534e-06, "loss": 93.4124, "step": 54490 }, { "epoch": 0.45084170906233195, "grad_norm": 1171.9190673828125, "learning_rate": 6.866492758497171e-06, "loss": 71.8976, "step": 54500 }, { "epoch": 0.4509244323117012, "grad_norm": 1028.865478515625, "learning_rate": 6.865184270839546e-06, "loss": 79.6011, "step": 54510 }, { "epoch": 0.45100715556107046, "grad_norm": 1303.591552734375, "learning_rate": 6.863875634777767e-06, "loss": 111.4603, "step": 54520 }, { "epoch": 0.45108987881043966, "grad_norm": 702.6375122070312, "learning_rate": 6.86256685041596e-06, "loss": 76.6232, "step": 54530 }, { "epoch": 0.4511726020598089, "grad_norm": 1701.81005859375, "learning_rate": 6.861257917858257e-06, "loss": 110.8894, "step": 54540 }, { "epoch": 0.45125532530917817, "grad_norm": 640.2454833984375, "learning_rate": 6.859948837208802e-06, "loss": 83.109, "step": 54550 }, { "epoch": 0.45133804855854737, "grad_norm": 681.0009155273438, "learning_rate": 6.8586396085717536e-06, "loss": 105.6306, "step": 54560 }, { "epoch": 0.4514207718079166, "grad_norm": 2675.404052734375, "learning_rate": 6.8573302320512836e-06, "loss": 132.9688, "step": 54570 }, { "epoch": 0.4515034950572859, "grad_norm": 891.1157836914062, "learning_rate": 6.85602070775157e-06, "loss": 103.1662, "step": 54580 }, { "epoch": 0.4515862183066551, "grad_norm": 1364.07666015625, "learning_rate": 6.854711035776806e-06, "loss": 99.1324, "step": 54590 }, { "epoch": 0.45166894155602433, "grad_norm": 524.9562377929688, "learning_rate": 6.853401216231198e-06, "loss": 106.766, "step": 54600 }, { "epoch": 0.4517516648053936, "grad_norm": 818.6365966796875, "learning_rate": 6.8520912492189605e-06, "loss": 80.276, "step": 54610 }, { "epoch": 0.4518343880547628, "grad_norm": 1519.1331787109375, "learning_rate": 6.850781134844323e-06, "loss": 69.9319, "step": 54620 }, { "epoch": 0.45191711130413204, "grad_norm": 697.9396362304688, "learning_rate": 6.8494708732115235e-06, "loss": 123.4269, "step": 54630 }, { "epoch": 0.45199983455350123, "grad_norm": 990.307373046875, "learning_rate": 6.8481604644248155e-06, "loss": 89.6535, "step": 54640 }, { "epoch": 0.4520825578028705, "grad_norm": 651.0850830078125, "learning_rate": 6.846849908588461e-06, "loss": 80.7496, "step": 54650 }, { "epoch": 0.45216528105223974, "grad_norm": 1296.8258056640625, "learning_rate": 6.845539205806735e-06, "loss": 129.8521, "step": 54660 }, { "epoch": 0.45224800430160894, "grad_norm": 867.2517700195312, "learning_rate": 6.844228356183924e-06, "loss": 76.4351, "step": 54670 }, { "epoch": 0.4523307275509782, "grad_norm": 878.8154907226562, "learning_rate": 6.842917359824326e-06, "loss": 103.5479, "step": 54680 }, { "epoch": 0.45241345080034745, "grad_norm": 589.3108520507812, "learning_rate": 6.841606216832253e-06, "loss": 158.1967, "step": 54690 }, { "epoch": 0.45249617404971665, "grad_norm": 440.4094543457031, "learning_rate": 6.840294927312024e-06, "loss": 84.1425, "step": 54700 }, { "epoch": 0.4525788972990859, "grad_norm": 773.7758178710938, "learning_rate": 6.838983491367974e-06, "loss": 81.9263, "step": 54710 }, { "epoch": 0.45266162054845516, "grad_norm": 1917.0274658203125, "learning_rate": 6.837671909104447e-06, "loss": 108.1925, "step": 54720 }, { "epoch": 0.45274434379782436, "grad_norm": 528.8961791992188, "learning_rate": 6.836360180625801e-06, "loss": 123.8608, "step": 54730 }, { "epoch": 0.4528270670471936, "grad_norm": 719.6814575195312, "learning_rate": 6.835048306036404e-06, "loss": 108.1355, "step": 54740 }, { "epoch": 0.45290979029656286, "grad_norm": 1164.0782470703125, "learning_rate": 6.833736285440632e-06, "loss": 83.8529, "step": 54750 }, { "epoch": 0.45299251354593206, "grad_norm": 540.9407958984375, "learning_rate": 6.832424118942881e-06, "loss": 125.107, "step": 54760 }, { "epoch": 0.4530752367953013, "grad_norm": 736.422119140625, "learning_rate": 6.831111806647552e-06, "loss": 106.4315, "step": 54770 }, { "epoch": 0.45315796004467057, "grad_norm": 697.813232421875, "learning_rate": 6.829799348659061e-06, "loss": 105.6159, "step": 54780 }, { "epoch": 0.45324068329403977, "grad_norm": 925.9052734375, "learning_rate": 6.828486745081835e-06, "loss": 116.2571, "step": 54790 }, { "epoch": 0.453323406543409, "grad_norm": 2547.846923828125, "learning_rate": 6.8271739960203065e-06, "loss": 142.8061, "step": 54800 }, { "epoch": 0.4534061297927783, "grad_norm": 1475.7213134765625, "learning_rate": 6.825861101578931e-06, "loss": 81.7697, "step": 54810 }, { "epoch": 0.4534888530421475, "grad_norm": 1138.5965576171875, "learning_rate": 6.824548061862166e-06, "loss": 92.7645, "step": 54820 }, { "epoch": 0.45357157629151673, "grad_norm": 851.4307250976562, "learning_rate": 6.823234876974489e-06, "loss": 121.6354, "step": 54830 }, { "epoch": 0.453654299540886, "grad_norm": 1226.28076171875, "learning_rate": 6.8219215470203756e-06, "loss": 102.6578, "step": 54840 }, { "epoch": 0.4537370227902552, "grad_norm": 578.7922973632812, "learning_rate": 6.820608072104329e-06, "loss": 102.5517, "step": 54850 }, { "epoch": 0.45381974603962444, "grad_norm": 673.4541625976562, "learning_rate": 6.819294452330853e-06, "loss": 69.6824, "step": 54860 }, { "epoch": 0.4539024692889937, "grad_norm": 2711.473876953125, "learning_rate": 6.817980687804467e-06, "loss": 101.1284, "step": 54870 }, { "epoch": 0.4539851925383629, "grad_norm": 736.4070434570312, "learning_rate": 6.8166667786297e-06, "loss": 96.8542, "step": 54880 }, { "epoch": 0.45406791578773215, "grad_norm": 760.29150390625, "learning_rate": 6.815352724911095e-06, "loss": 105.462, "step": 54890 }, { "epoch": 0.4541506390371014, "grad_norm": 692.9434204101562, "learning_rate": 6.814038526753205e-06, "loss": 91.7443, "step": 54900 }, { "epoch": 0.4542333622864706, "grad_norm": 549.2918090820312, "learning_rate": 6.812724184260596e-06, "loss": 73.3805, "step": 54910 }, { "epoch": 0.45431608553583985, "grad_norm": 702.6921997070312, "learning_rate": 6.811409697537843e-06, "loss": 84.0114, "step": 54920 }, { "epoch": 0.4543988087852091, "grad_norm": 787.5999755859375, "learning_rate": 6.810095066689533e-06, "loss": 101.185, "step": 54930 }, { "epoch": 0.4544815320345783, "grad_norm": 1066.020263671875, "learning_rate": 6.808780291820264e-06, "loss": 99.9101, "step": 54940 }, { "epoch": 0.45456425528394756, "grad_norm": 1216.9044189453125, "learning_rate": 6.80746537303465e-06, "loss": 120.2617, "step": 54950 }, { "epoch": 0.4546469785333168, "grad_norm": 1686.6910400390625, "learning_rate": 6.806150310437312e-06, "loss": 121.6102, "step": 54960 }, { "epoch": 0.454729701782686, "grad_norm": 1484.89111328125, "learning_rate": 6.804835104132883e-06, "loss": 104.0064, "step": 54970 }, { "epoch": 0.45481242503205527, "grad_norm": 979.0828857421875, "learning_rate": 6.803519754226007e-06, "loss": 94.6825, "step": 54980 }, { "epoch": 0.4548951482814245, "grad_norm": 864.7905883789062, "learning_rate": 6.80220426082134e-06, "loss": 91.2416, "step": 54990 }, { "epoch": 0.4549778715307937, "grad_norm": 888.6685791015625, "learning_rate": 6.800888624023552e-06, "loss": 105.9705, "step": 55000 }, { "epoch": 0.455060594780163, "grad_norm": 497.9424133300781, "learning_rate": 6.799572843937322e-06, "loss": 108.1165, "step": 55010 }, { "epoch": 0.4551433180295322, "grad_norm": 518.8756713867188, "learning_rate": 6.79825692066734e-06, "loss": 106.614, "step": 55020 }, { "epoch": 0.4552260412789014, "grad_norm": 1119.955078125, "learning_rate": 6.796940854318306e-06, "loss": 140.4494, "step": 55030 }, { "epoch": 0.4553087645282707, "grad_norm": 594.2408447265625, "learning_rate": 6.795624644994936e-06, "loss": 77.2674, "step": 55040 }, { "epoch": 0.4553914877776399, "grad_norm": 797.1567993164062, "learning_rate": 6.794308292801954e-06, "loss": 111.2638, "step": 55050 }, { "epoch": 0.45547421102700913, "grad_norm": 821.6486206054688, "learning_rate": 6.792991797844095e-06, "loss": 136.5393, "step": 55060 }, { "epoch": 0.4555569342763784, "grad_norm": 597.4654541015625, "learning_rate": 6.791675160226109e-06, "loss": 96.4142, "step": 55070 }, { "epoch": 0.4556396575257476, "grad_norm": 592.2135009765625, "learning_rate": 6.790358380052752e-06, "loss": 93.9998, "step": 55080 }, { "epoch": 0.45572238077511684, "grad_norm": 1021.7890625, "learning_rate": 6.789041457428796e-06, "loss": 124.1362, "step": 55090 }, { "epoch": 0.4558051040244861, "grad_norm": 486.76080322265625, "learning_rate": 6.7877243924590205e-06, "loss": 77.4204, "step": 55100 }, { "epoch": 0.4558878272738553, "grad_norm": 875.5304565429688, "learning_rate": 6.7864071852482205e-06, "loss": 104.1027, "step": 55110 }, { "epoch": 0.45597055052322455, "grad_norm": 738.2188110351562, "learning_rate": 6.7850898359012e-06, "loss": 106.4531, "step": 55120 }, { "epoch": 0.4560532737725938, "grad_norm": 1422.643798828125, "learning_rate": 6.7837723445227724e-06, "loss": 101.3412, "step": 55130 }, { "epoch": 0.456135997021963, "grad_norm": 1019.88818359375, "learning_rate": 6.782454711217767e-06, "loss": 104.1804, "step": 55140 }, { "epoch": 0.45621872027133226, "grad_norm": 1043.4281005859375, "learning_rate": 6.78113693609102e-06, "loss": 159.6309, "step": 55150 }, { "epoch": 0.4563014435207015, "grad_norm": 733.2887573242188, "learning_rate": 6.77981901924738e-06, "loss": 76.8028, "step": 55160 }, { "epoch": 0.4563841667700707, "grad_norm": 650.5491943359375, "learning_rate": 6.7785009607917095e-06, "loss": 132.4901, "step": 55170 }, { "epoch": 0.45646689001943996, "grad_norm": 993.3848876953125, "learning_rate": 6.777182760828881e-06, "loss": 127.0856, "step": 55180 }, { "epoch": 0.4565496132688092, "grad_norm": 941.2340087890625, "learning_rate": 6.7758644194637755e-06, "loss": 68.8753, "step": 55190 }, { "epoch": 0.4566323365181784, "grad_norm": 1339.5758056640625, "learning_rate": 6.774545936801289e-06, "loss": 93.002, "step": 55200 }, { "epoch": 0.45671505976754767, "grad_norm": 605.7567749023438, "learning_rate": 6.773227312946327e-06, "loss": 123.4982, "step": 55210 }, { "epoch": 0.4567977830169169, "grad_norm": 984.2488403320312, "learning_rate": 6.771908548003803e-06, "loss": 77.7853, "step": 55220 }, { "epoch": 0.4568805062662861, "grad_norm": 829.1386108398438, "learning_rate": 6.77058964207865e-06, "loss": 213.839, "step": 55230 }, { "epoch": 0.4569632295156554, "grad_norm": 764.8698120117188, "learning_rate": 6.769270595275804e-06, "loss": 102.3559, "step": 55240 }, { "epoch": 0.45704595276502463, "grad_norm": 798.2879028320312, "learning_rate": 6.767951407700217e-06, "loss": 90.5174, "step": 55250 }, { "epoch": 0.45712867601439383, "grad_norm": 856.211181640625, "learning_rate": 6.766632079456852e-06, "loss": 85.0527, "step": 55260 }, { "epoch": 0.4572113992637631, "grad_norm": 831.584716796875, "learning_rate": 6.765312610650677e-06, "loss": 95.3017, "step": 55270 }, { "epoch": 0.45729412251313234, "grad_norm": 883.2146606445312, "learning_rate": 6.763993001386681e-06, "loss": 113.933, "step": 55280 }, { "epoch": 0.45737684576250154, "grad_norm": 954.8955688476562, "learning_rate": 6.762673251769858e-06, "loss": 121.8417, "step": 55290 }, { "epoch": 0.4574595690118708, "grad_norm": 1161.1273193359375, "learning_rate": 6.761353361905214e-06, "loss": 90.7742, "step": 55300 }, { "epoch": 0.45754229226124005, "grad_norm": 683.6365966796875, "learning_rate": 6.7600333318977655e-06, "loss": 91.0024, "step": 55310 }, { "epoch": 0.45762501551060925, "grad_norm": 1601.54638671875, "learning_rate": 6.758713161852541e-06, "loss": 110.4416, "step": 55320 }, { "epoch": 0.4577077387599785, "grad_norm": 934.130126953125, "learning_rate": 6.757392851874584e-06, "loss": 113.9463, "step": 55330 }, { "epoch": 0.45779046200934775, "grad_norm": 1528.1434326171875, "learning_rate": 6.756072402068943e-06, "loss": 127.9479, "step": 55340 }, { "epoch": 0.45787318525871695, "grad_norm": 771.1652221679688, "learning_rate": 6.75475181254068e-06, "loss": 89.5568, "step": 55350 }, { "epoch": 0.4579559085080862, "grad_norm": 879.8575439453125, "learning_rate": 6.753431083394868e-06, "loss": 99.1166, "step": 55360 }, { "epoch": 0.4580386317574554, "grad_norm": 684.24609375, "learning_rate": 6.75211021473659e-06, "loss": 86.4435, "step": 55370 }, { "epoch": 0.45812135500682466, "grad_norm": 1416.9102783203125, "learning_rate": 6.750789206670945e-06, "loss": 95.3575, "step": 55380 }, { "epoch": 0.4582040782561939, "grad_norm": 889.2215576171875, "learning_rate": 6.749468059303039e-06, "loss": 92.2547, "step": 55390 }, { "epoch": 0.4582868015055631, "grad_norm": 1061.4666748046875, "learning_rate": 6.748146772737988e-06, "loss": 101.2263, "step": 55400 }, { "epoch": 0.45836952475493237, "grad_norm": 1019.367919921875, "learning_rate": 6.7468253470809205e-06, "loss": 136.364, "step": 55410 }, { "epoch": 0.4584522480043016, "grad_norm": 1758.954833984375, "learning_rate": 6.745503782436976e-06, "loss": 124.5927, "step": 55420 }, { "epoch": 0.4585349712536708, "grad_norm": 763.4608154296875, "learning_rate": 6.7441820789113085e-06, "loss": 105.7726, "step": 55430 }, { "epoch": 0.4586176945030401, "grad_norm": 946.8983764648438, "learning_rate": 6.7428602366090764e-06, "loss": 112.0909, "step": 55440 }, { "epoch": 0.45870041775240933, "grad_norm": 932.01318359375, "learning_rate": 6.741538255635454e-06, "loss": 88.3237, "step": 55450 }, { "epoch": 0.4587831410017785, "grad_norm": 829.2271118164062, "learning_rate": 6.740216136095626e-06, "loss": 77.0077, "step": 55460 }, { "epoch": 0.4588658642511478, "grad_norm": 386.30364990234375, "learning_rate": 6.738893878094786e-06, "loss": 87.8386, "step": 55470 }, { "epoch": 0.45894858750051704, "grad_norm": 1005.0819702148438, "learning_rate": 6.737571481738141e-06, "loss": 128.3534, "step": 55480 }, { "epoch": 0.45903131074988623, "grad_norm": 921.326904296875, "learning_rate": 6.736248947130907e-06, "loss": 107.7665, "step": 55490 }, { "epoch": 0.4591140339992555, "grad_norm": 1480.9969482421875, "learning_rate": 6.734926274378313e-06, "loss": 163.0793, "step": 55500 }, { "epoch": 0.45919675724862474, "grad_norm": 955.8799438476562, "learning_rate": 6.733603463585598e-06, "loss": 93.535, "step": 55510 }, { "epoch": 0.45927948049799394, "grad_norm": 1354.3538818359375, "learning_rate": 6.73228051485801e-06, "loss": 85.5063, "step": 55520 }, { "epoch": 0.4593622037473632, "grad_norm": 536.1598510742188, "learning_rate": 6.7309574283008125e-06, "loss": 84.9367, "step": 55530 }, { "epoch": 0.45944492699673245, "grad_norm": 663.9678344726562, "learning_rate": 6.729634204019277e-06, "loss": 104.2453, "step": 55540 }, { "epoch": 0.45952765024610165, "grad_norm": 690.8705444335938, "learning_rate": 6.7283108421186835e-06, "loss": 108.3504, "step": 55550 }, { "epoch": 0.4596103734954709, "grad_norm": 1198.9808349609375, "learning_rate": 6.726987342704331e-06, "loss": 83.5574, "step": 55560 }, { "epoch": 0.45969309674484016, "grad_norm": 780.8455810546875, "learning_rate": 6.72566370588152e-06, "loss": 92.0601, "step": 55570 }, { "epoch": 0.45977581999420936, "grad_norm": 1550.3758544921875, "learning_rate": 6.724339931755568e-06, "loss": 114.7621, "step": 55580 }, { "epoch": 0.4598585432435786, "grad_norm": 515.3203735351562, "learning_rate": 6.7230160204318e-06, "loss": 85.6729, "step": 55590 }, { "epoch": 0.45994126649294786, "grad_norm": 622.2483520507812, "learning_rate": 6.721691972015557e-06, "loss": 91.3313, "step": 55600 }, { "epoch": 0.46002398974231706, "grad_norm": 1014.8973999023438, "learning_rate": 6.720367786612185e-06, "loss": 71.0175, "step": 55610 }, { "epoch": 0.4601067129916863, "grad_norm": 2489.36962890625, "learning_rate": 6.719043464327043e-06, "loss": 82.0414, "step": 55620 }, { "epoch": 0.46018943624105557, "grad_norm": 736.4323120117188, "learning_rate": 6.717719005265502e-06, "loss": 68.9147, "step": 55630 }, { "epoch": 0.46027215949042477, "grad_norm": 749.0441284179688, "learning_rate": 6.716394409532944e-06, "loss": 117.8003, "step": 55640 }, { "epoch": 0.460354882739794, "grad_norm": 869.51123046875, "learning_rate": 6.715069677234758e-06, "loss": 116.4776, "step": 55650 }, { "epoch": 0.4604376059891633, "grad_norm": 686.819091796875, "learning_rate": 6.713744808476349e-06, "loss": 101.6366, "step": 55660 }, { "epoch": 0.4605203292385325, "grad_norm": 1027.5953369140625, "learning_rate": 6.712419803363132e-06, "loss": 92.2464, "step": 55670 }, { "epoch": 0.46060305248790173, "grad_norm": 743.2421875, "learning_rate": 6.711094662000529e-06, "loss": 104.2116, "step": 55680 }, { "epoch": 0.460685775737271, "grad_norm": 992.1659545898438, "learning_rate": 6.709769384493978e-06, "loss": 102.0216, "step": 55690 }, { "epoch": 0.4607684989866402, "grad_norm": 763.0567626953125, "learning_rate": 6.708443970948923e-06, "loss": 105.3009, "step": 55700 }, { "epoch": 0.46085122223600944, "grad_norm": 1082.239501953125, "learning_rate": 6.707118421470822e-06, "loss": 82.143, "step": 55710 }, { "epoch": 0.4609339454853787, "grad_norm": 943.666259765625, "learning_rate": 6.705792736165142e-06, "loss": 117.292, "step": 55720 }, { "epoch": 0.4610166687347479, "grad_norm": 761.20166015625, "learning_rate": 6.7044669151373645e-06, "loss": 79.007, "step": 55730 }, { "epoch": 0.46109939198411715, "grad_norm": 676.6871337890625, "learning_rate": 6.7031409584929765e-06, "loss": 96.3533, "step": 55740 }, { "epoch": 0.46118211523348634, "grad_norm": 851.9503173828125, "learning_rate": 6.701814866337477e-06, "loss": 131.2042, "step": 55750 }, { "epoch": 0.4612648384828556, "grad_norm": 1339.9852294921875, "learning_rate": 6.700488638776379e-06, "loss": 113.9575, "step": 55760 }, { "epoch": 0.46134756173222485, "grad_norm": 945.3772583007812, "learning_rate": 6.699162275915208e-06, "loss": 88.1573, "step": 55770 }, { "epoch": 0.46143028498159405, "grad_norm": 1223.3470458984375, "learning_rate": 6.6978357778594896e-06, "loss": 81.1195, "step": 55780 }, { "epoch": 0.4615130082309633, "grad_norm": 922.9274291992188, "learning_rate": 6.69650914471477e-06, "loss": 97.8219, "step": 55790 }, { "epoch": 0.46159573148033256, "grad_norm": 1972.91162109375, "learning_rate": 6.695182376586603e-06, "loss": 99.1887, "step": 55800 }, { "epoch": 0.46167845472970176, "grad_norm": 1156.863525390625, "learning_rate": 6.6938554735805565e-06, "loss": 99.5834, "step": 55810 }, { "epoch": 0.461761177979071, "grad_norm": 1650.710205078125, "learning_rate": 6.6925284358022035e-06, "loss": 155.1294, "step": 55820 }, { "epoch": 0.46184390122844027, "grad_norm": 520.858642578125, "learning_rate": 6.69120126335713e-06, "loss": 122.3727, "step": 55830 }, { "epoch": 0.46192662447780947, "grad_norm": 916.6767578125, "learning_rate": 6.689873956350932e-06, "loss": 75.5874, "step": 55840 }, { "epoch": 0.4620093477271787, "grad_norm": 917.3632202148438, "learning_rate": 6.688546514889221e-06, "loss": 82.2032, "step": 55850 }, { "epoch": 0.462092070976548, "grad_norm": 1109.2620849609375, "learning_rate": 6.687218939077613e-06, "loss": 93.1306, "step": 55860 }, { "epoch": 0.4621747942259172, "grad_norm": 933.2094116210938, "learning_rate": 6.685891229021736e-06, "loss": 82.2505, "step": 55870 }, { "epoch": 0.46225751747528643, "grad_norm": 930.94873046875, "learning_rate": 6.6845633848272315e-06, "loss": 113.1939, "step": 55880 }, { "epoch": 0.4623402407246557, "grad_norm": 808.5699462890625, "learning_rate": 6.68323540659975e-06, "loss": 100.777, "step": 55890 }, { "epoch": 0.4624229639740249, "grad_norm": 663.3551025390625, "learning_rate": 6.681907294444952e-06, "loss": 67.9267, "step": 55900 }, { "epoch": 0.46250568722339414, "grad_norm": 1566.524169921875, "learning_rate": 6.6805790484685094e-06, "loss": 93.2308, "step": 55910 }, { "epoch": 0.4625884104727634, "grad_norm": 1127.6248779296875, "learning_rate": 6.679250668776105e-06, "loss": 140.5565, "step": 55920 }, { "epoch": 0.4626711337221326, "grad_norm": 1181.0595703125, "learning_rate": 6.677922155473432e-06, "loss": 99.0083, "step": 55930 }, { "epoch": 0.46275385697150184, "grad_norm": 1022.6056518554688, "learning_rate": 6.676593508666192e-06, "loss": 135.9459, "step": 55940 }, { "epoch": 0.4628365802208711, "grad_norm": 1002.8577880859375, "learning_rate": 6.675264728460103e-06, "loss": 85.4971, "step": 55950 }, { "epoch": 0.4629193034702403, "grad_norm": 944.582763671875, "learning_rate": 6.673935814960887e-06, "loss": 107.2265, "step": 55960 }, { "epoch": 0.46300202671960955, "grad_norm": 1288.594970703125, "learning_rate": 6.672606768274281e-06, "loss": 90.4464, "step": 55970 }, { "epoch": 0.4630847499689788, "grad_norm": 14770.8916015625, "learning_rate": 6.67127758850603e-06, "loss": 241.7437, "step": 55980 }, { "epoch": 0.463167473218348, "grad_norm": 957.4649047851562, "learning_rate": 6.669948275761893e-06, "loss": 94.3999, "step": 55990 }, { "epoch": 0.46325019646771726, "grad_norm": 1285.9119873046875, "learning_rate": 6.668618830147634e-06, "loss": 125.4894, "step": 56000 }, { "epoch": 0.4633329197170865, "grad_norm": 722.8943481445312, "learning_rate": 6.667289251769033e-06, "loss": 85.9025, "step": 56010 }, { "epoch": 0.4634156429664557, "grad_norm": 1016.5885009765625, "learning_rate": 6.6659595407318775e-06, "loss": 107.344, "step": 56020 }, { "epoch": 0.46349836621582496, "grad_norm": 900.4605712890625, "learning_rate": 6.664629697141969e-06, "loss": 111.3321, "step": 56030 }, { "epoch": 0.4635810894651942, "grad_norm": 971.4387817382812, "learning_rate": 6.663299721105113e-06, "loss": 106.712, "step": 56040 }, { "epoch": 0.4636638127145634, "grad_norm": 1102.5128173828125, "learning_rate": 6.661969612727133e-06, "loss": 94.0693, "step": 56050 }, { "epoch": 0.46374653596393267, "grad_norm": 1239.50341796875, "learning_rate": 6.660639372113858e-06, "loss": 109.2637, "step": 56060 }, { "epoch": 0.4638292592133019, "grad_norm": 986.812255859375, "learning_rate": 6.65930899937113e-06, "loss": 104.8453, "step": 56070 }, { "epoch": 0.4639119824626711, "grad_norm": 1008.2682495117188, "learning_rate": 6.657978494604799e-06, "loss": 109.9477, "step": 56080 }, { "epoch": 0.4639947057120404, "grad_norm": 683.7396850585938, "learning_rate": 6.656647857920728e-06, "loss": 109.3742, "step": 56090 }, { "epoch": 0.4640774289614096, "grad_norm": 1494.6700439453125, "learning_rate": 6.655317089424791e-06, "loss": 106.0912, "step": 56100 }, { "epoch": 0.46416015221077883, "grad_norm": 1352.005615234375, "learning_rate": 6.6539861892228695e-06, "loss": 96.1662, "step": 56110 }, { "epoch": 0.4642428754601481, "grad_norm": 787.8734741210938, "learning_rate": 6.652655157420859e-06, "loss": 81.0995, "step": 56120 }, { "epoch": 0.4643255987095173, "grad_norm": 837.31787109375, "learning_rate": 6.651323994124661e-06, "loss": 93.8052, "step": 56130 }, { "epoch": 0.46440832195888654, "grad_norm": 1119.1298828125, "learning_rate": 6.649992699440191e-06, "loss": 86.7144, "step": 56140 }, { "epoch": 0.4644910452082558, "grad_norm": 1033.329345703125, "learning_rate": 6.648661273473375e-06, "loss": 73.4606, "step": 56150 }, { "epoch": 0.464573768457625, "grad_norm": 1087.544921875, "learning_rate": 6.6473297163301485e-06, "loss": 93.5026, "step": 56160 }, { "epoch": 0.46465649170699425, "grad_norm": 637.343505859375, "learning_rate": 6.645998028116455e-06, "loss": 106.3616, "step": 56170 }, { "epoch": 0.4647392149563635, "grad_norm": 1312.9207763671875, "learning_rate": 6.6446662089382545e-06, "loss": 126.186, "step": 56180 }, { "epoch": 0.4648219382057327, "grad_norm": 756.4004516601562, "learning_rate": 6.643334258901511e-06, "loss": 110.0223, "step": 56190 }, { "epoch": 0.46490466145510195, "grad_norm": 577.8167724609375, "learning_rate": 6.642002178112202e-06, "loss": 114.2335, "step": 56200 }, { "epoch": 0.4649873847044712, "grad_norm": 419.95404052734375, "learning_rate": 6.640669966676316e-06, "loss": 88.2521, "step": 56210 }, { "epoch": 0.4650701079538404, "grad_norm": 503.7681579589844, "learning_rate": 6.6393376246998485e-06, "loss": 105.7174, "step": 56220 }, { "epoch": 0.46515283120320966, "grad_norm": 721.5469360351562, "learning_rate": 6.638005152288811e-06, "loss": 100.881, "step": 56230 }, { "epoch": 0.4652355544525789, "grad_norm": 1773.4716796875, "learning_rate": 6.636672549549221e-06, "loss": 115.8908, "step": 56240 }, { "epoch": 0.4653182777019481, "grad_norm": 960.2298583984375, "learning_rate": 6.635339816587109e-06, "loss": 109.554, "step": 56250 }, { "epoch": 0.46540100095131737, "grad_norm": 1094.2969970703125, "learning_rate": 6.634006953508512e-06, "loss": 104.5612, "step": 56260 }, { "epoch": 0.4654837242006866, "grad_norm": 706.4091186523438, "learning_rate": 6.63267396041948e-06, "loss": 113.3086, "step": 56270 }, { "epoch": 0.4655664474500558, "grad_norm": 1204.162841796875, "learning_rate": 6.631340837426075e-06, "loss": 105.2585, "step": 56280 }, { "epoch": 0.4656491706994251, "grad_norm": 807.52734375, "learning_rate": 6.630007584634366e-06, "loss": 78.1581, "step": 56290 }, { "epoch": 0.46573189394879433, "grad_norm": 912.2439575195312, "learning_rate": 6.628674202150434e-06, "loss": 95.7974, "step": 56300 }, { "epoch": 0.4658146171981635, "grad_norm": 793.3104858398438, "learning_rate": 6.627340690080371e-06, "loss": 94.2195, "step": 56310 }, { "epoch": 0.4658973404475328, "grad_norm": 1065.8125, "learning_rate": 6.626007048530276e-06, "loss": 72.4793, "step": 56320 }, { "epoch": 0.46598006369690204, "grad_norm": 565.6997680664062, "learning_rate": 6.624673277606264e-06, "loss": 90.7239, "step": 56330 }, { "epoch": 0.46606278694627123, "grad_norm": 1088.072998046875, "learning_rate": 6.623339377414456e-06, "loss": 114.9387, "step": 56340 }, { "epoch": 0.4661455101956405, "grad_norm": 1036.712158203125, "learning_rate": 6.622005348060983e-06, "loss": 98.3773, "step": 56350 }, { "epoch": 0.46622823344500974, "grad_norm": 657.3320922851562, "learning_rate": 6.620671189651988e-06, "loss": 78.3256, "step": 56360 }, { "epoch": 0.46631095669437894, "grad_norm": 396.6316223144531, "learning_rate": 6.6193369022936245e-06, "loss": 111.291, "step": 56370 }, { "epoch": 0.4663936799437482, "grad_norm": 747.0289306640625, "learning_rate": 6.618002486092056e-06, "loss": 131.3509, "step": 56380 }, { "epoch": 0.46647640319311745, "grad_norm": 645.677978515625, "learning_rate": 6.616667941153456e-06, "loss": 112.265, "step": 56390 }, { "epoch": 0.46655912644248665, "grad_norm": 2745.71533203125, "learning_rate": 6.615333267584007e-06, "loss": 94.6054, "step": 56400 }, { "epoch": 0.4666418496918559, "grad_norm": 617.6024780273438, "learning_rate": 6.613998465489902e-06, "loss": 86.2714, "step": 56410 }, { "epoch": 0.46672457294122516, "grad_norm": 2982.6630859375, "learning_rate": 6.612663534977347e-06, "loss": 158.7063, "step": 56420 }, { "epoch": 0.46680729619059436, "grad_norm": 372.95379638671875, "learning_rate": 6.611328476152557e-06, "loss": 127.7486, "step": 56430 }, { "epoch": 0.4668900194399636, "grad_norm": 665.3734130859375, "learning_rate": 6.609993289121753e-06, "loss": 108.4631, "step": 56440 }, { "epoch": 0.4669727426893328, "grad_norm": 637.3652954101562, "learning_rate": 6.608657973991172e-06, "loss": 84.1843, "step": 56450 }, { "epoch": 0.46705546593870206, "grad_norm": 962.6121215820312, "learning_rate": 6.607322530867061e-06, "loss": 88.8814, "step": 56460 }, { "epoch": 0.4671381891880713, "grad_norm": 588.2682495117188, "learning_rate": 6.605986959855672e-06, "loss": 76.9025, "step": 56470 }, { "epoch": 0.4672209124374405, "grad_norm": 1287.7611083984375, "learning_rate": 6.60465126106327e-06, "loss": 120.8118, "step": 56480 }, { "epoch": 0.46730363568680977, "grad_norm": 932.3352661132812, "learning_rate": 6.6033154345961314e-06, "loss": 89.3703, "step": 56490 }, { "epoch": 0.467386358936179, "grad_norm": 577.4403076171875, "learning_rate": 6.601979480560543e-06, "loss": 94.8228, "step": 56500 }, { "epoch": 0.4674690821855482, "grad_norm": 937.298583984375, "learning_rate": 6.6006433990627985e-06, "loss": 122.7913, "step": 56510 }, { "epoch": 0.4675518054349175, "grad_norm": 588.9931030273438, "learning_rate": 6.599307190209206e-06, "loss": 112.8304, "step": 56520 }, { "epoch": 0.46763452868428673, "grad_norm": 1339.0936279296875, "learning_rate": 6.5979708541060796e-06, "loss": 144.9437, "step": 56530 }, { "epoch": 0.46771725193365593, "grad_norm": 1465.9853515625, "learning_rate": 6.596634390859745e-06, "loss": 91.1747, "step": 56540 }, { "epoch": 0.4677999751830252, "grad_norm": 890.4679565429688, "learning_rate": 6.59529780057654e-06, "loss": 82.902, "step": 56550 }, { "epoch": 0.46788269843239444, "grad_norm": 839.3494873046875, "learning_rate": 6.593961083362811e-06, "loss": 85.888, "step": 56560 }, { "epoch": 0.46796542168176364, "grad_norm": 992.9126586914062, "learning_rate": 6.592624239324914e-06, "loss": 109.3493, "step": 56570 }, { "epoch": 0.4680481449311329, "grad_norm": 1971.344482421875, "learning_rate": 6.591287268569215e-06, "loss": 99.3406, "step": 56580 }, { "epoch": 0.46813086818050215, "grad_norm": 1292.9296875, "learning_rate": 6.589950171202092e-06, "loss": 67.5184, "step": 56590 }, { "epoch": 0.46821359142987135, "grad_norm": 1166.9710693359375, "learning_rate": 6.588612947329929e-06, "loss": 121.0264, "step": 56600 }, { "epoch": 0.4682963146792406, "grad_norm": 1211.2210693359375, "learning_rate": 6.587275597059125e-06, "loss": 90.2953, "step": 56610 }, { "epoch": 0.46837903792860985, "grad_norm": 860.5660400390625, "learning_rate": 6.585938120496087e-06, "loss": 110.3769, "step": 56620 }, { "epoch": 0.46846176117797905, "grad_norm": 1788.8858642578125, "learning_rate": 6.584600517747232e-06, "loss": 111.8886, "step": 56630 }, { "epoch": 0.4685444844273483, "grad_norm": 826.596923828125, "learning_rate": 6.583262788918985e-06, "loss": 84.6638, "step": 56640 }, { "epoch": 0.46862720767671756, "grad_norm": 794.5626220703125, "learning_rate": 6.581924934117783e-06, "loss": 108.4789, "step": 56650 }, { "epoch": 0.46870993092608676, "grad_norm": 1118.61962890625, "learning_rate": 6.580586953450076e-06, "loss": 74.7545, "step": 56660 }, { "epoch": 0.468792654175456, "grad_norm": 763.598388671875, "learning_rate": 6.579248847022317e-06, "loss": 78.2781, "step": 56670 }, { "epoch": 0.46887537742482527, "grad_norm": 1182.76806640625, "learning_rate": 6.577910614940978e-06, "loss": 97.6059, "step": 56680 }, { "epoch": 0.46895810067419447, "grad_norm": 1287.8709716796875, "learning_rate": 6.576572257312531e-06, "loss": 94.5327, "step": 56690 }, { "epoch": 0.4690408239235637, "grad_norm": 625.0133666992188, "learning_rate": 6.5752337742434644e-06, "loss": 90.6583, "step": 56700 }, { "epoch": 0.469123547172933, "grad_norm": 352.9053649902344, "learning_rate": 6.573895165840276e-06, "loss": 99.1602, "step": 56710 }, { "epoch": 0.4692062704223022, "grad_norm": 1227.5992431640625, "learning_rate": 6.5725564322094745e-06, "loss": 106.9007, "step": 56720 }, { "epoch": 0.46928899367167143, "grad_norm": 1981.240234375, "learning_rate": 6.571217573457573e-06, "loss": 110.456, "step": 56730 }, { "epoch": 0.4693717169210407, "grad_norm": 915.96728515625, "learning_rate": 6.569878589691101e-06, "loss": 64.4055, "step": 56740 }, { "epoch": 0.4694544401704099, "grad_norm": 1044.962890625, "learning_rate": 6.568539481016593e-06, "loss": 108.6498, "step": 56750 }, { "epoch": 0.46953716341977914, "grad_norm": 1632.702880859375, "learning_rate": 6.567200247540599e-06, "loss": 123.9141, "step": 56760 }, { "epoch": 0.4696198866691484, "grad_norm": 1159.7938232421875, "learning_rate": 6.5658608893696714e-06, "loss": 105.7761, "step": 56770 }, { "epoch": 0.4697026099185176, "grad_norm": 1023.9876098632812, "learning_rate": 6.564521406610382e-06, "loss": 130.591, "step": 56780 }, { "epoch": 0.46978533316788684, "grad_norm": 1106.73779296875, "learning_rate": 6.563181799369301e-06, "loss": 89.8389, "step": 56790 }, { "epoch": 0.4698680564172561, "grad_norm": 636.095458984375, "learning_rate": 6.561842067753021e-06, "loss": 95.9526, "step": 56800 }, { "epoch": 0.4699507796666253, "grad_norm": 725.4862670898438, "learning_rate": 6.560502211868135e-06, "loss": 71.1143, "step": 56810 }, { "epoch": 0.47003350291599455, "grad_norm": 946.0494384765625, "learning_rate": 6.55916223182125e-06, "loss": 106.9812, "step": 56820 }, { "epoch": 0.47011622616536375, "grad_norm": 1510.7655029296875, "learning_rate": 6.55782212771898e-06, "loss": 147.8765, "step": 56830 }, { "epoch": 0.470198949414733, "grad_norm": 1354.79296875, "learning_rate": 6.5564818996679536e-06, "loss": 121.1624, "step": 56840 }, { "epoch": 0.47028167266410226, "grad_norm": 1657.0260009765625, "learning_rate": 6.555141547774807e-06, "loss": 153.1369, "step": 56850 }, { "epoch": 0.47036439591347146, "grad_norm": 432.9583435058594, "learning_rate": 6.553801072146184e-06, "loss": 112.8747, "step": 56860 }, { "epoch": 0.4704471191628407, "grad_norm": 1424.265380859375, "learning_rate": 6.55246047288874e-06, "loss": 102.5585, "step": 56870 }, { "epoch": 0.47052984241220996, "grad_norm": 842.8648071289062, "learning_rate": 6.551119750109142e-06, "loss": 95.888, "step": 56880 }, { "epoch": 0.47061256566157916, "grad_norm": 1254.891845703125, "learning_rate": 6.5497789039140635e-06, "loss": 88.7369, "step": 56890 }, { "epoch": 0.4706952889109484, "grad_norm": 593.7796020507812, "learning_rate": 6.54843793441019e-06, "loss": 117.424, "step": 56900 }, { "epoch": 0.47077801216031767, "grad_norm": 5807.5322265625, "learning_rate": 6.547096841704217e-06, "loss": 123.5693, "step": 56910 }, { "epoch": 0.47086073540968687, "grad_norm": 698.7401733398438, "learning_rate": 6.545755625902848e-06, "loss": 108.5493, "step": 56920 }, { "epoch": 0.4709434586590561, "grad_norm": 770.432373046875, "learning_rate": 6.544414287112798e-06, "loss": 60.7358, "step": 56930 }, { "epoch": 0.4710261819084254, "grad_norm": 772.1907348632812, "learning_rate": 6.54307282544079e-06, "loss": 90.2566, "step": 56940 }, { "epoch": 0.4711089051577946, "grad_norm": 543.7560424804688, "learning_rate": 6.5417312409935606e-06, "loss": 74.9508, "step": 56950 }, { "epoch": 0.47119162840716383, "grad_norm": 605.939697265625, "learning_rate": 6.540389533877852e-06, "loss": 117.9458, "step": 56960 }, { "epoch": 0.4712743516565331, "grad_norm": 546.9862670898438, "learning_rate": 6.539047704200417e-06, "loss": 83.1111, "step": 56970 }, { "epoch": 0.4713570749059023, "grad_norm": 860.575439453125, "learning_rate": 6.53770575206802e-06, "loss": 102.208, "step": 56980 }, { "epoch": 0.47143979815527154, "grad_norm": 616.0123291015625, "learning_rate": 6.536363677587433e-06, "loss": 101.8752, "step": 56990 }, { "epoch": 0.4715225214046408, "grad_norm": 912.4677124023438, "learning_rate": 6.535021480865439e-06, "loss": 94.7414, "step": 57000 }, { "epoch": 0.47160524465401, "grad_norm": 1033.161376953125, "learning_rate": 6.5336791620088306e-06, "loss": 98.8203, "step": 57010 }, { "epoch": 0.47168796790337925, "grad_norm": 813.3025512695312, "learning_rate": 6.53233672112441e-06, "loss": 111.74, "step": 57020 }, { "epoch": 0.4717706911527485, "grad_norm": 1582.189208984375, "learning_rate": 6.530994158318988e-06, "loss": 113.1147, "step": 57030 }, { "epoch": 0.4718534144021177, "grad_norm": 862.9869384765625, "learning_rate": 6.529651473699389e-06, "loss": 85.5126, "step": 57040 }, { "epoch": 0.47193613765148695, "grad_norm": 1320.765869140625, "learning_rate": 6.528308667372441e-06, "loss": 101.8769, "step": 57050 }, { "epoch": 0.4720188609008562, "grad_norm": 717.2901611328125, "learning_rate": 6.526965739444988e-06, "loss": 119.4057, "step": 57060 }, { "epoch": 0.4721015841502254, "grad_norm": 699.390625, "learning_rate": 6.525622690023878e-06, "loss": 105.9801, "step": 57070 }, { "epoch": 0.47218430739959466, "grad_norm": 1108.2218017578125, "learning_rate": 6.524279519215972e-06, "loss": 105.0386, "step": 57080 }, { "epoch": 0.4722670306489639, "grad_norm": 1312.4166259765625, "learning_rate": 6.522936227128139e-06, "loss": 116.3358, "step": 57090 }, { "epoch": 0.4723497538983331, "grad_norm": 881.8489379882812, "learning_rate": 6.521592813867261e-06, "loss": 115.962, "step": 57100 }, { "epoch": 0.47243247714770237, "grad_norm": 1025.9259033203125, "learning_rate": 6.520249279540227e-06, "loss": 112.2708, "step": 57110 }, { "epoch": 0.4725152003970716, "grad_norm": 968.8225708007812, "learning_rate": 6.5189056242539325e-06, "loss": 81.8784, "step": 57120 }, { "epoch": 0.4725979236464408, "grad_norm": 776.3606567382812, "learning_rate": 6.51756184811529e-06, "loss": 97.869, "step": 57130 }, { "epoch": 0.4726806468958101, "grad_norm": 1188.362060546875, "learning_rate": 6.516217951231215e-06, "loss": 78.5015, "step": 57140 }, { "epoch": 0.47276337014517933, "grad_norm": 1396.0478515625, "learning_rate": 6.514873933708637e-06, "loss": 115.7227, "step": 57150 }, { "epoch": 0.47284609339454853, "grad_norm": 701.3724365234375, "learning_rate": 6.513529795654493e-06, "loss": 91.0152, "step": 57160 }, { "epoch": 0.4729288166439178, "grad_norm": 773.8468627929688, "learning_rate": 6.512185537175727e-06, "loss": 140.7189, "step": 57170 }, { "epoch": 0.473011539893287, "grad_norm": 948.971923828125, "learning_rate": 6.5108411583793e-06, "loss": 163.1197, "step": 57180 }, { "epoch": 0.47309426314265624, "grad_norm": 662.3345336914062, "learning_rate": 6.509496659372175e-06, "loss": 92.557, "step": 57190 }, { "epoch": 0.4731769863920255, "grad_norm": 902.59521484375, "learning_rate": 6.508152040261329e-06, "loss": 98.1511, "step": 57200 }, { "epoch": 0.4732597096413947, "grad_norm": 912.7888793945312, "learning_rate": 6.506807301153746e-06, "loss": 82.9847, "step": 57210 }, { "epoch": 0.47334243289076394, "grad_norm": 711.9552001953125, "learning_rate": 6.5054624421564204e-06, "loss": 85.3893, "step": 57220 }, { "epoch": 0.4734251561401332, "grad_norm": 996.9633178710938, "learning_rate": 6.504117463376358e-06, "loss": 103.8014, "step": 57230 }, { "epoch": 0.4735078793895024, "grad_norm": 889.486083984375, "learning_rate": 6.502772364920573e-06, "loss": 119.4068, "step": 57240 }, { "epoch": 0.47359060263887165, "grad_norm": 1359.7982177734375, "learning_rate": 6.501427146896087e-06, "loss": 131.6854, "step": 57250 }, { "epoch": 0.4736733258882409, "grad_norm": 863.1353759765625, "learning_rate": 6.5000818094099345e-06, "loss": 125.5572, "step": 57260 }, { "epoch": 0.4737560491376101, "grad_norm": 583.1968383789062, "learning_rate": 6.498736352569155e-06, "loss": 97.2687, "step": 57270 }, { "epoch": 0.47383877238697936, "grad_norm": 613.0692138671875, "learning_rate": 6.497390776480804e-06, "loss": 83.3367, "step": 57280 }, { "epoch": 0.4739214956363486, "grad_norm": 894.8236083984375, "learning_rate": 6.49604508125194e-06, "loss": 123.6242, "step": 57290 }, { "epoch": 0.4740042188857178, "grad_norm": 1123.2113037109375, "learning_rate": 6.4946992669896355e-06, "loss": 90.5414, "step": 57300 }, { "epoch": 0.47408694213508706, "grad_norm": 634.5689086914062, "learning_rate": 6.493353333800969e-06, "loss": 84.9406, "step": 57310 }, { "epoch": 0.4741696653844563, "grad_norm": 618.6438598632812, "learning_rate": 6.492007281793032e-06, "loss": 101.8569, "step": 57320 }, { "epoch": 0.4742523886338255, "grad_norm": 760.6466064453125, "learning_rate": 6.490661111072923e-06, "loss": 98.2763, "step": 57330 }, { "epoch": 0.47433511188319477, "grad_norm": 642.0409545898438, "learning_rate": 6.489314821747751e-06, "loss": 82.7239, "step": 57340 }, { "epoch": 0.474417835132564, "grad_norm": 491.7732849121094, "learning_rate": 6.487968413924634e-06, "loss": 82.5276, "step": 57350 }, { "epoch": 0.4745005583819332, "grad_norm": 847.6077270507812, "learning_rate": 6.486621887710698e-06, "loss": 85.0193, "step": 57360 }, { "epoch": 0.4745832816313025, "grad_norm": 1501.081787109375, "learning_rate": 6.485275243213081e-06, "loss": 88.0962, "step": 57370 }, { "epoch": 0.47466600488067173, "grad_norm": 961.3021850585938, "learning_rate": 6.4839284805389305e-06, "loss": 131.3477, "step": 57380 }, { "epoch": 0.47474872813004093, "grad_norm": 695.2109985351562, "learning_rate": 6.4825815997954e-06, "loss": 99.5357, "step": 57390 }, { "epoch": 0.4748314513794102, "grad_norm": 478.3403625488281, "learning_rate": 6.481234601089655e-06, "loss": 96.97, "step": 57400 }, { "epoch": 0.47491417462877944, "grad_norm": 967.1907348632812, "learning_rate": 6.4798874845288725e-06, "loss": 84.3332, "step": 57410 }, { "epoch": 0.47499689787814864, "grad_norm": 789.9042358398438, "learning_rate": 6.4785402502202345e-06, "loss": 98.0186, "step": 57420 }, { "epoch": 0.4750796211275179, "grad_norm": 515.9044799804688, "learning_rate": 6.477192898270934e-06, "loss": 98.3447, "step": 57430 }, { "epoch": 0.47516234437688715, "grad_norm": 777.9580078125, "learning_rate": 6.475845428788173e-06, "loss": 92.8213, "step": 57440 }, { "epoch": 0.47524506762625635, "grad_norm": 732.5836791992188, "learning_rate": 6.474497841879166e-06, "loss": 100.1301, "step": 57450 }, { "epoch": 0.4753277908756256, "grad_norm": 905.1466064453125, "learning_rate": 6.473150137651132e-06, "loss": 87.0629, "step": 57460 }, { "epoch": 0.47541051412499485, "grad_norm": 1096.08935546875, "learning_rate": 6.471802316211302e-06, "loss": 94.4893, "step": 57470 }, { "epoch": 0.47549323737436405, "grad_norm": 781.8897094726562, "learning_rate": 6.4704543776669174e-06, "loss": 85.6178, "step": 57480 }, { "epoch": 0.4755759606237333, "grad_norm": 1561.7882080078125, "learning_rate": 6.469106322125227e-06, "loss": 110.5366, "step": 57490 }, { "epoch": 0.47565868387310256, "grad_norm": 773.6251220703125, "learning_rate": 6.467758149693486e-06, "loss": 94.7606, "step": 57500 }, { "epoch": 0.47574140712247176, "grad_norm": 696.1630249023438, "learning_rate": 6.466409860478967e-06, "loss": 84.5489, "step": 57510 }, { "epoch": 0.475824130371841, "grad_norm": 1580.1494140625, "learning_rate": 6.465061454588946e-06, "loss": 114.6306, "step": 57520 }, { "epoch": 0.47590685362121027, "grad_norm": 584.140380859375, "learning_rate": 6.463712932130708e-06, "loss": 91.1199, "step": 57530 }, { "epoch": 0.47598957687057947, "grad_norm": 678.8070068359375, "learning_rate": 6.462364293211549e-06, "loss": 80.3412, "step": 57540 }, { "epoch": 0.4760723001199487, "grad_norm": 671.9398803710938, "learning_rate": 6.4610155379387755e-06, "loss": 116.5642, "step": 57550 }, { "epoch": 0.4761550233693179, "grad_norm": 1506.737060546875, "learning_rate": 6.459666666419699e-06, "loss": 79.6158, "step": 57560 }, { "epoch": 0.4762377466186872, "grad_norm": 695.2562255859375, "learning_rate": 6.4583176787616466e-06, "loss": 61.0726, "step": 57570 }, { "epoch": 0.47632046986805643, "grad_norm": 1047.5999755859375, "learning_rate": 6.456968575071951e-06, "loss": 115.039, "step": 57580 }, { "epoch": 0.4764031931174256, "grad_norm": 865.7882690429688, "learning_rate": 6.45561935545795e-06, "loss": 111.2492, "step": 57590 }, { "epoch": 0.4764859163667949, "grad_norm": 991.0267333984375, "learning_rate": 6.454270020026996e-06, "loss": 88.2671, "step": 57600 }, { "epoch": 0.47656863961616414, "grad_norm": 985.0994873046875, "learning_rate": 6.452920568886452e-06, "loss": 101.4334, "step": 57610 }, { "epoch": 0.47665136286553333, "grad_norm": 621.8745727539062, "learning_rate": 6.451571002143687e-06, "loss": 80.6865, "step": 57620 }, { "epoch": 0.4767340861149026, "grad_norm": 851.007080078125, "learning_rate": 6.450221319906079e-06, "loss": 93.8453, "step": 57630 }, { "epoch": 0.47681680936427184, "grad_norm": 1132.76708984375, "learning_rate": 6.448871522281016e-06, "loss": 85.5419, "step": 57640 }, { "epoch": 0.47689953261364104, "grad_norm": 739.5919189453125, "learning_rate": 6.447521609375894e-06, "loss": 67.1973, "step": 57650 }, { "epoch": 0.4769822558630103, "grad_norm": 829.6648559570312, "learning_rate": 6.446171581298123e-06, "loss": 106.6791, "step": 57660 }, { "epoch": 0.47706497911237955, "grad_norm": 1077.7239990234375, "learning_rate": 6.444821438155115e-06, "loss": 80.1578, "step": 57670 }, { "epoch": 0.47714770236174875, "grad_norm": 654.5147094726562, "learning_rate": 6.443471180054297e-06, "loss": 69.1088, "step": 57680 }, { "epoch": 0.477230425611118, "grad_norm": 1064.0953369140625, "learning_rate": 6.442120807103102e-06, "loss": 96.6103, "step": 57690 }, { "epoch": 0.47731314886048726, "grad_norm": 846.7904663085938, "learning_rate": 6.440770319408971e-06, "loss": 113.4604, "step": 57700 }, { "epoch": 0.47739587210985646, "grad_norm": 1160.3079833984375, "learning_rate": 6.43941971707936e-06, "loss": 110.0301, "step": 57710 }, { "epoch": 0.4774785953592257, "grad_norm": 524.7237548828125, "learning_rate": 6.438069000221727e-06, "loss": 92.4454, "step": 57720 }, { "epoch": 0.47756131860859496, "grad_norm": 363.4929504394531, "learning_rate": 6.4367181689435434e-06, "loss": 107.8367, "step": 57730 }, { "epoch": 0.47764404185796416, "grad_norm": 953.7645263671875, "learning_rate": 6.435367223352289e-06, "loss": 121.483, "step": 57740 }, { "epoch": 0.4777267651073334, "grad_norm": 1026.001708984375, "learning_rate": 6.434016163555452e-06, "loss": 91.6397, "step": 57750 }, { "epoch": 0.47780948835670267, "grad_norm": 811.3062133789062, "learning_rate": 6.432664989660531e-06, "loss": 91.4573, "step": 57760 }, { "epoch": 0.47789221160607187, "grad_norm": 1303.14111328125, "learning_rate": 6.43131370177503e-06, "loss": 112.111, "step": 57770 }, { "epoch": 0.4779749348554411, "grad_norm": 754.6259765625, "learning_rate": 6.429962300006468e-06, "loss": 100.0983, "step": 57780 }, { "epoch": 0.4780576581048104, "grad_norm": 738.0693359375, "learning_rate": 6.428610784462368e-06, "loss": 79.652, "step": 57790 }, { "epoch": 0.4781403813541796, "grad_norm": 1683.4595947265625, "learning_rate": 6.427259155250265e-06, "loss": 95.6643, "step": 57800 }, { "epoch": 0.47822310460354883, "grad_norm": 840.048828125, "learning_rate": 6.4259074124777e-06, "loss": 93.0407, "step": 57810 }, { "epoch": 0.4783058278529181, "grad_norm": 849.6332397460938, "learning_rate": 6.4245555562522265e-06, "loss": 98.129, "step": 57820 }, { "epoch": 0.4783885511022873, "grad_norm": 495.7574768066406, "learning_rate": 6.423203586681406e-06, "loss": 87.5308, "step": 57830 }, { "epoch": 0.47847127435165654, "grad_norm": 998.4563598632812, "learning_rate": 6.421851503872807e-06, "loss": 124.0158, "step": 57840 }, { "epoch": 0.4785539976010258, "grad_norm": 636.4727783203125, "learning_rate": 6.42049930793401e-06, "loss": 74.8035, "step": 57850 }, { "epoch": 0.478636720850395, "grad_norm": 872.4989013671875, "learning_rate": 6.419146998972602e-06, "loss": 78.3126, "step": 57860 }, { "epoch": 0.47871944409976425, "grad_norm": 1358.409912109375, "learning_rate": 6.417794577096179e-06, "loss": 98.5134, "step": 57870 }, { "epoch": 0.4788021673491335, "grad_norm": 1421.047119140625, "learning_rate": 6.41644204241235e-06, "loss": 119.0474, "step": 57880 }, { "epoch": 0.4788848905985027, "grad_norm": 750.8233642578125, "learning_rate": 6.4150893950287275e-06, "loss": 108.5803, "step": 57890 }, { "epoch": 0.47896761384787195, "grad_norm": 2111.186767578125, "learning_rate": 6.413736635052936e-06, "loss": 141.4099, "step": 57900 }, { "epoch": 0.47905033709724115, "grad_norm": 681.4733276367188, "learning_rate": 6.41238376259261e-06, "loss": 63.5274, "step": 57910 }, { "epoch": 0.4791330603466104, "grad_norm": 805.0604248046875, "learning_rate": 6.411030777755389e-06, "loss": 98.8302, "step": 57920 }, { "epoch": 0.47921578359597966, "grad_norm": 810.17041015625, "learning_rate": 6.409677680648925e-06, "loss": 113.7874, "step": 57930 }, { "epoch": 0.47929850684534886, "grad_norm": 827.2803344726562, "learning_rate": 6.4083244713808765e-06, "loss": 119.2214, "step": 57940 }, { "epoch": 0.4793812300947181, "grad_norm": 1362.2303466796875, "learning_rate": 6.406971150058914e-06, "loss": 92.4825, "step": 57950 }, { "epoch": 0.47946395334408737, "grad_norm": 855.9775390625, "learning_rate": 6.405617716790714e-06, "loss": 79.9808, "step": 57960 }, { "epoch": 0.47954667659345657, "grad_norm": 632.9134521484375, "learning_rate": 6.404264171683965e-06, "loss": 87.7965, "step": 57970 }, { "epoch": 0.4796293998428258, "grad_norm": 731.3795166015625, "learning_rate": 6.402910514846358e-06, "loss": 82.6081, "step": 57980 }, { "epoch": 0.4797121230921951, "grad_norm": 498.25, "learning_rate": 6.4015567463856e-06, "loss": 82.2082, "step": 57990 }, { "epoch": 0.4797948463415643, "grad_norm": 1057.318359375, "learning_rate": 6.400202866409405e-06, "loss": 111.3383, "step": 58000 }, { "epoch": 0.47987756959093353, "grad_norm": 775.5044555664062, "learning_rate": 6.398848875025494e-06, "loss": 88.3121, "step": 58010 }, { "epoch": 0.4799602928403028, "grad_norm": 1417.432373046875, "learning_rate": 6.3974947723415985e-06, "loss": 95.6965, "step": 58020 }, { "epoch": 0.480043016089672, "grad_norm": 644.7517700195312, "learning_rate": 6.396140558465456e-06, "loss": 104.0072, "step": 58030 }, { "epoch": 0.48012573933904124, "grad_norm": 849.2105712890625, "learning_rate": 6.394786233504816e-06, "loss": 86.352, "step": 58040 }, { "epoch": 0.4802084625884105, "grad_norm": 824.1756591796875, "learning_rate": 6.39343179756744e-06, "loss": 114.2667, "step": 58050 }, { "epoch": 0.4802911858377797, "grad_norm": 567.4685668945312, "learning_rate": 6.392077250761088e-06, "loss": 88.4801, "step": 58060 }, { "epoch": 0.48037390908714894, "grad_norm": 910.6168823242188, "learning_rate": 6.390722593193538e-06, "loss": 85.6822, "step": 58070 }, { "epoch": 0.4804566323365182, "grad_norm": 924.6448364257812, "learning_rate": 6.389367824972575e-06, "loss": 96.7753, "step": 58080 }, { "epoch": 0.4805393555858874, "grad_norm": 814.3735961914062, "learning_rate": 6.388012946205991e-06, "loss": 90.9101, "step": 58090 }, { "epoch": 0.48062207883525665, "grad_norm": 1433.7420654296875, "learning_rate": 6.386657957001585e-06, "loss": 105.3125, "step": 58100 }, { "epoch": 0.4807048020846259, "grad_norm": 841.0825805664062, "learning_rate": 6.38530285746717e-06, "loss": 128.7726, "step": 58110 }, { "epoch": 0.4807875253339951, "grad_norm": 1228.2952880859375, "learning_rate": 6.383947647710565e-06, "loss": 114.0045, "step": 58120 }, { "epoch": 0.48087024858336436, "grad_norm": 716.3721313476562, "learning_rate": 6.382592327839596e-06, "loss": 96.0517, "step": 58130 }, { "epoch": 0.4809529718327336, "grad_norm": 837.0059204101562, "learning_rate": 6.381236897962102e-06, "loss": 112.9432, "step": 58140 }, { "epoch": 0.4810356950821028, "grad_norm": 1200.560302734375, "learning_rate": 6.379881358185926e-06, "loss": 126.8512, "step": 58150 }, { "epoch": 0.48111841833147206, "grad_norm": 772.40478515625, "learning_rate": 6.378525708618924e-06, "loss": 75.8569, "step": 58160 }, { "epoch": 0.4812011415808413, "grad_norm": 980.9589233398438, "learning_rate": 6.377169949368956e-06, "loss": 83.9632, "step": 58170 }, { "epoch": 0.4812838648302105, "grad_norm": 647.2731323242188, "learning_rate": 6.375814080543899e-06, "loss": 104.6065, "step": 58180 }, { "epoch": 0.48136658807957977, "grad_norm": 1349.8760986328125, "learning_rate": 6.3744581022516285e-06, "loss": 82.4153, "step": 58190 }, { "epoch": 0.481449311328949, "grad_norm": 1214.870361328125, "learning_rate": 6.373102014600033e-06, "loss": 106.5302, "step": 58200 }, { "epoch": 0.4815320345783182, "grad_norm": 1136.8599853515625, "learning_rate": 6.371745817697012e-06, "loss": 110.1129, "step": 58210 }, { "epoch": 0.4816147578276875, "grad_norm": 2490.23876953125, "learning_rate": 6.370389511650474e-06, "loss": 138.5235, "step": 58220 }, { "epoch": 0.48169748107705673, "grad_norm": 851.6533203125, "learning_rate": 6.3690330965683304e-06, "loss": 105.1765, "step": 58230 }, { "epoch": 0.48178020432642593, "grad_norm": 635.8304443359375, "learning_rate": 6.367676572558506e-06, "loss": 138.1114, "step": 58240 }, { "epoch": 0.4818629275757952, "grad_norm": 747.5317993164062, "learning_rate": 6.366319939728934e-06, "loss": 86.2269, "step": 58250 }, { "epoch": 0.48194565082516444, "grad_norm": 1157.3438720703125, "learning_rate": 6.364963198187555e-06, "loss": 81.2648, "step": 58260 }, { "epoch": 0.48202837407453364, "grad_norm": 961.3770141601562, "learning_rate": 6.363606348042318e-06, "loss": 69.4496, "step": 58270 }, { "epoch": 0.4821110973239029, "grad_norm": 823.779541015625, "learning_rate": 6.362249389401183e-06, "loss": 96.8622, "step": 58280 }, { "epoch": 0.4821938205732721, "grad_norm": 510.96942138671875, "learning_rate": 6.360892322372115e-06, "loss": 77.7201, "step": 58290 }, { "epoch": 0.48227654382264135, "grad_norm": 745.046875, "learning_rate": 6.359535147063092e-06, "loss": 81.9999, "step": 58300 }, { "epoch": 0.4823592670720106, "grad_norm": 611.5297241210938, "learning_rate": 6.358177863582095e-06, "loss": 87.0968, "step": 58310 }, { "epoch": 0.4824419903213798, "grad_norm": 860.2018432617188, "learning_rate": 6.35682047203712e-06, "loss": 81.5001, "step": 58320 }, { "epoch": 0.48252471357074905, "grad_norm": 439.9252014160156, "learning_rate": 6.355462972536166e-06, "loss": 88.4102, "step": 58330 }, { "epoch": 0.4826074368201183, "grad_norm": 922.5792846679688, "learning_rate": 6.354105365187244e-06, "loss": 91.7119, "step": 58340 }, { "epoch": 0.4826901600694875, "grad_norm": 1041.380615234375, "learning_rate": 6.352747650098373e-06, "loss": 104.9556, "step": 58350 }, { "epoch": 0.48277288331885676, "grad_norm": 902.4854125976562, "learning_rate": 6.35138982737758e-06, "loss": 104.9955, "step": 58360 }, { "epoch": 0.482855606568226, "grad_norm": 856.4240112304688, "learning_rate": 6.3500318971329e-06, "loss": 68.9174, "step": 58370 }, { "epoch": 0.4829383298175952, "grad_norm": 613.3790893554688, "learning_rate": 6.348673859472378e-06, "loss": 104.4474, "step": 58380 }, { "epoch": 0.48302105306696447, "grad_norm": 625.4129028320312, "learning_rate": 6.347315714504066e-06, "loss": 104.9294, "step": 58390 }, { "epoch": 0.4831037763163337, "grad_norm": 597.6600341796875, "learning_rate": 6.345957462336026e-06, "loss": 81.4773, "step": 58400 }, { "epoch": 0.4831864995657029, "grad_norm": 531.10986328125, "learning_rate": 6.344599103076329e-06, "loss": 100.6717, "step": 58410 }, { "epoch": 0.4832692228150722, "grad_norm": 812.7492065429688, "learning_rate": 6.343240636833051e-06, "loss": 108.6641, "step": 58420 }, { "epoch": 0.48335194606444143, "grad_norm": 1158.002685546875, "learning_rate": 6.341882063714282e-06, "loss": 125.5026, "step": 58430 }, { "epoch": 0.4834346693138106, "grad_norm": 846.1316528320312, "learning_rate": 6.340523383828115e-06, "loss": 107.7689, "step": 58440 }, { "epoch": 0.4835173925631799, "grad_norm": 1098.09130859375, "learning_rate": 6.339164597282652e-06, "loss": 106.0673, "step": 58450 }, { "epoch": 0.48360011581254914, "grad_norm": 925.63818359375, "learning_rate": 6.337805704186011e-06, "loss": 118.7983, "step": 58460 }, { "epoch": 0.48368283906191833, "grad_norm": 558.7349853515625, "learning_rate": 6.336446704646307e-06, "loss": 105.5596, "step": 58470 }, { "epoch": 0.4837655623112876, "grad_norm": 918.0574340820312, "learning_rate": 6.335087598771676e-06, "loss": 102.35, "step": 58480 }, { "epoch": 0.48384828556065684, "grad_norm": 2873.980712890625, "learning_rate": 6.333728386670249e-06, "loss": 102.2267, "step": 58490 }, { "epoch": 0.48393100881002604, "grad_norm": 740.7039794921875, "learning_rate": 6.332369068450175e-06, "loss": 92.0805, "step": 58500 }, { "epoch": 0.4840137320593953, "grad_norm": 1080.8912353515625, "learning_rate": 6.33100964421961e-06, "loss": 99.742, "step": 58510 }, { "epoch": 0.48409645530876455, "grad_norm": 1029.510009765625, "learning_rate": 6.329650114086717e-06, "loss": 77.3601, "step": 58520 }, { "epoch": 0.48417917855813375, "grad_norm": 739.2198486328125, "learning_rate": 6.328290478159666e-06, "loss": 108.9343, "step": 58530 }, { "epoch": 0.484261901807503, "grad_norm": 584.7896118164062, "learning_rate": 6.326930736546637e-06, "loss": 87.4123, "step": 58540 }, { "epoch": 0.48434462505687226, "grad_norm": 1001.3221435546875, "learning_rate": 6.325570889355819e-06, "loss": 115.3958, "step": 58550 }, { "epoch": 0.48442734830624146, "grad_norm": 0.0, "learning_rate": 6.32421093669541e-06, "loss": 100.6869, "step": 58560 }, { "epoch": 0.4845100715556107, "grad_norm": 1452.001953125, "learning_rate": 6.322850878673614e-06, "loss": 127.1429, "step": 58570 }, { "epoch": 0.48459279480497996, "grad_norm": 841.3821411132812, "learning_rate": 6.321490715398644e-06, "loss": 94.9818, "step": 58580 }, { "epoch": 0.48467551805434916, "grad_norm": 813.81298828125, "learning_rate": 6.320130446978722e-06, "loss": 76.4308, "step": 58590 }, { "epoch": 0.4847582413037184, "grad_norm": 1246.0950927734375, "learning_rate": 6.31877007352208e-06, "loss": 111.9752, "step": 58600 }, { "epoch": 0.48484096455308767, "grad_norm": 822.40283203125, "learning_rate": 6.317409595136956e-06, "loss": 90.058, "step": 58610 }, { "epoch": 0.48492368780245687, "grad_norm": 1240.24609375, "learning_rate": 6.316049011931595e-06, "loss": 118.0982, "step": 58620 }, { "epoch": 0.4850064110518261, "grad_norm": 710.9925537109375, "learning_rate": 6.314688324014255e-06, "loss": 99.1123, "step": 58630 }, { "epoch": 0.4850891343011953, "grad_norm": 1035.0576171875, "learning_rate": 6.3133275314931995e-06, "loss": 123.4674, "step": 58640 }, { "epoch": 0.4851718575505646, "grad_norm": 811.2945556640625, "learning_rate": 6.311966634476698e-06, "loss": 93.8845, "step": 58650 }, { "epoch": 0.48525458079993383, "grad_norm": 568.3858642578125, "learning_rate": 6.3106056330730335e-06, "loss": 96.4744, "step": 58660 }, { "epoch": 0.48533730404930303, "grad_norm": 682.4791259765625, "learning_rate": 6.309244527390493e-06, "loss": 96.0774, "step": 58670 }, { "epoch": 0.4854200272986723, "grad_norm": 1391.8082275390625, "learning_rate": 6.307883317537375e-06, "loss": 113.0187, "step": 58680 }, { "epoch": 0.48550275054804154, "grad_norm": 672.600830078125, "learning_rate": 6.306522003621983e-06, "loss": 79.8365, "step": 58690 }, { "epoch": 0.48558547379741074, "grad_norm": 1114.2835693359375, "learning_rate": 6.305160585752632e-06, "loss": 116.9864, "step": 58700 }, { "epoch": 0.48566819704678, "grad_norm": 904.7263793945312, "learning_rate": 6.303799064037643e-06, "loss": 95.5957, "step": 58710 }, { "epoch": 0.48575092029614925, "grad_norm": 1291.3994140625, "learning_rate": 6.302437438585345e-06, "loss": 107.933, "step": 58720 }, { "epoch": 0.48583364354551845, "grad_norm": 1033.6456298828125, "learning_rate": 6.301075709504077e-06, "loss": 132.5331, "step": 58730 }, { "epoch": 0.4859163667948877, "grad_norm": 939.7048950195312, "learning_rate": 6.299713876902188e-06, "loss": 93.3385, "step": 58740 }, { "epoch": 0.48599909004425695, "grad_norm": 1379.9310302734375, "learning_rate": 6.29835194088803e-06, "loss": 93.6808, "step": 58750 }, { "epoch": 0.48608181329362615, "grad_norm": 850.0859375, "learning_rate": 6.296989901569966e-06, "loss": 106.2699, "step": 58760 }, { "epoch": 0.4861645365429954, "grad_norm": 845.896728515625, "learning_rate": 6.295627759056368e-06, "loss": 97.0875, "step": 58770 }, { "epoch": 0.48624725979236466, "grad_norm": 910.60302734375, "learning_rate": 6.294265513455616e-06, "loss": 107.4305, "step": 58780 }, { "epoch": 0.48632998304173386, "grad_norm": 895.4052734375, "learning_rate": 6.292903164876097e-06, "loss": 99.333, "step": 58790 }, { "epoch": 0.4864127062911031, "grad_norm": 443.9016418457031, "learning_rate": 6.291540713426206e-06, "loss": 76.0029, "step": 58800 }, { "epoch": 0.48649542954047237, "grad_norm": 621.6075439453125, "learning_rate": 6.290178159214349e-06, "loss": 88.676, "step": 58810 }, { "epoch": 0.48657815278984157, "grad_norm": 459.60284423828125, "learning_rate": 6.288815502348935e-06, "loss": 94.1174, "step": 58820 }, { "epoch": 0.4866608760392108, "grad_norm": 1254.715576171875, "learning_rate": 6.287452742938388e-06, "loss": 91.2785, "step": 58830 }, { "epoch": 0.4867435992885801, "grad_norm": 1114.0794677734375, "learning_rate": 6.286089881091134e-06, "loss": 139.8125, "step": 58840 }, { "epoch": 0.4868263225379493, "grad_norm": 853.3452758789062, "learning_rate": 6.284726916915611e-06, "loss": 101.1512, "step": 58850 }, { "epoch": 0.48690904578731853, "grad_norm": 741.5625610351562, "learning_rate": 6.2833638505202635e-06, "loss": 115.4677, "step": 58860 }, { "epoch": 0.4869917690366878, "grad_norm": 1342.6466064453125, "learning_rate": 6.282000682013545e-06, "loss": 93.9246, "step": 58870 }, { "epoch": 0.487074492286057, "grad_norm": 594.8483276367188, "learning_rate": 6.280637411503913e-06, "loss": 108.2251, "step": 58880 }, { "epoch": 0.48715721553542624, "grad_norm": 899.0263061523438, "learning_rate": 6.279274039099842e-06, "loss": 93.0218, "step": 58890 }, { "epoch": 0.4872399387847955, "grad_norm": 858.6878662109375, "learning_rate": 6.277910564909806e-06, "loss": 91.2543, "step": 58900 }, { "epoch": 0.4873226620341647, "grad_norm": 974.9313354492188, "learning_rate": 6.276546989042292e-06, "loss": 91.2285, "step": 58910 }, { "epoch": 0.48740538528353394, "grad_norm": 653.6936645507812, "learning_rate": 6.275183311605793e-06, "loss": 101.074, "step": 58920 }, { "epoch": 0.4874881085329032, "grad_norm": 816.3530883789062, "learning_rate": 6.273819532708807e-06, "loss": 94.144, "step": 58930 }, { "epoch": 0.4875708317822724, "grad_norm": 1098.2479248046875, "learning_rate": 6.27245565245985e-06, "loss": 104.6022, "step": 58940 }, { "epoch": 0.48765355503164165, "grad_norm": 1253.642333984375, "learning_rate": 6.271091670967437e-06, "loss": 101.674, "step": 58950 }, { "epoch": 0.4877362782810109, "grad_norm": 577.8956909179688, "learning_rate": 6.269727588340091e-06, "loss": 82.5646, "step": 58960 }, { "epoch": 0.4878190015303801, "grad_norm": 667.8779296875, "learning_rate": 6.268363404686348e-06, "loss": 116.7945, "step": 58970 }, { "epoch": 0.48790172477974936, "grad_norm": 716.1468505859375, "learning_rate": 6.26699912011475e-06, "loss": 118.8406, "step": 58980 }, { "epoch": 0.4879844480291186, "grad_norm": 606.7252197265625, "learning_rate": 6.265634734733848e-06, "loss": 74.8637, "step": 58990 }, { "epoch": 0.4880671712784878, "grad_norm": 1299.558349609375, "learning_rate": 6.264270248652199e-06, "loss": 110.4696, "step": 59000 }, { "epoch": 0.48814989452785706, "grad_norm": 913.1611938476562, "learning_rate": 6.262905661978367e-06, "loss": 79.307, "step": 59010 }, { "epoch": 0.48823261777722626, "grad_norm": 590.417724609375, "learning_rate": 6.261540974820928e-06, "loss": 99.6395, "step": 59020 }, { "epoch": 0.4883153410265955, "grad_norm": 696.5634155273438, "learning_rate": 6.260176187288463e-06, "loss": 100.7072, "step": 59030 }, { "epoch": 0.48839806427596477, "grad_norm": 1761.085693359375, "learning_rate": 6.2588112994895636e-06, "loss": 151.4153, "step": 59040 }, { "epoch": 0.48848078752533397, "grad_norm": 1071.5557861328125, "learning_rate": 6.257446311532824e-06, "loss": 69.3153, "step": 59050 }, { "epoch": 0.4885635107747032, "grad_norm": 1153.7186279296875, "learning_rate": 6.256081223526854e-06, "loss": 93.4576, "step": 59060 }, { "epoch": 0.4886462340240725, "grad_norm": 1251.8411865234375, "learning_rate": 6.254716035580264e-06, "loss": 71.2112, "step": 59070 }, { "epoch": 0.4887289572734417, "grad_norm": 653.9671630859375, "learning_rate": 6.25335074780168e-06, "loss": 101.7776, "step": 59080 }, { "epoch": 0.48881168052281093, "grad_norm": 917.6962280273438, "learning_rate": 6.251985360299728e-06, "loss": 109.7955, "step": 59090 }, { "epoch": 0.4888944037721802, "grad_norm": 1049.5726318359375, "learning_rate": 6.250619873183046e-06, "loss": 97.9137, "step": 59100 }, { "epoch": 0.4889771270215494, "grad_norm": 960.5833129882812, "learning_rate": 6.249254286560281e-06, "loss": 115.5338, "step": 59110 }, { "epoch": 0.48905985027091864, "grad_norm": 1100.854248046875, "learning_rate": 6.247888600540084e-06, "loss": 102.4374, "step": 59120 }, { "epoch": 0.4891425735202879, "grad_norm": 858.7109985351562, "learning_rate": 6.246522815231121e-06, "loss": 86.8003, "step": 59130 }, { "epoch": 0.4892252967696571, "grad_norm": 758.7923583984375, "learning_rate": 6.245156930742057e-06, "loss": 99.9746, "step": 59140 }, { "epoch": 0.48930802001902635, "grad_norm": 957.5679931640625, "learning_rate": 6.24379094718157e-06, "loss": 105.2421, "step": 59150 }, { "epoch": 0.4893907432683956, "grad_norm": 932.5010986328125, "learning_rate": 6.2424248646583455e-06, "loss": 109.755, "step": 59160 }, { "epoch": 0.4894734665177648, "grad_norm": 1039.6802978515625, "learning_rate": 6.241058683281077e-06, "loss": 157.5874, "step": 59170 }, { "epoch": 0.48955618976713405, "grad_norm": 1249.9261474609375, "learning_rate": 6.239692403158465e-06, "loss": 80.0913, "step": 59180 }, { "epoch": 0.4896389130165033, "grad_norm": 769.3261108398438, "learning_rate": 6.238326024399217e-06, "loss": 116.0565, "step": 59190 }, { "epoch": 0.4897216362658725, "grad_norm": 3054.625, "learning_rate": 6.236959547112051e-06, "loss": 135.2796, "step": 59200 }, { "epoch": 0.48980435951524176, "grad_norm": 1218.1517333984375, "learning_rate": 6.235592971405691e-06, "loss": 96.0189, "step": 59210 }, { "epoch": 0.489887082764611, "grad_norm": 964.074462890625, "learning_rate": 6.234226297388869e-06, "loss": 135.8784, "step": 59220 }, { "epoch": 0.4899698060139802, "grad_norm": 822.4147338867188, "learning_rate": 6.232859525170324e-06, "loss": 74.8447, "step": 59230 }, { "epoch": 0.49005252926334947, "grad_norm": 1401.8765869140625, "learning_rate": 6.231492654858805e-06, "loss": 124.0554, "step": 59240 }, { "epoch": 0.4901352525127187, "grad_norm": 665.4353637695312, "learning_rate": 6.230125686563068e-06, "loss": 116.1569, "step": 59250 }, { "epoch": 0.4902179757620879, "grad_norm": 1295.380126953125, "learning_rate": 6.2287586203918745e-06, "loss": 114.117, "step": 59260 }, { "epoch": 0.4903006990114572, "grad_norm": 1130.935791015625, "learning_rate": 6.227391456453997e-06, "loss": 73.4887, "step": 59270 }, { "epoch": 0.49038342226082643, "grad_norm": 615.1046142578125, "learning_rate": 6.226024194858214e-06, "loss": 83.7547, "step": 59280 }, { "epoch": 0.49046614551019563, "grad_norm": 909.8365478515625, "learning_rate": 6.224656835713313e-06, "loss": 99.4262, "step": 59290 }, { "epoch": 0.4905488687595649, "grad_norm": 768.0503540039062, "learning_rate": 6.223289379128088e-06, "loss": 88.5417, "step": 59300 }, { "epoch": 0.49063159200893414, "grad_norm": 1546.40771484375, "learning_rate": 6.221921825211342e-06, "loss": 88.7849, "step": 59310 }, { "epoch": 0.49071431525830334, "grad_norm": 1119.7420654296875, "learning_rate": 6.220554174071884e-06, "loss": 216.5368, "step": 59320 }, { "epoch": 0.4907970385076726, "grad_norm": 738.0167846679688, "learning_rate": 6.219186425818531e-06, "loss": 111.826, "step": 59330 }, { "epoch": 0.49087976175704184, "grad_norm": 1079.2379150390625, "learning_rate": 6.217818580560111e-06, "loss": 138.3639, "step": 59340 }, { "epoch": 0.49096248500641104, "grad_norm": 455.43560791015625, "learning_rate": 6.216450638405454e-06, "loss": 106.5501, "step": 59350 }, { "epoch": 0.4910452082557803, "grad_norm": 1222.3670654296875, "learning_rate": 6.2150825994634025e-06, "loss": 76.1361, "step": 59360 }, { "epoch": 0.4911279315051495, "grad_norm": 2011.9967041015625, "learning_rate": 6.2137144638428045e-06, "loss": 113.7866, "step": 59370 }, { "epoch": 0.49121065475451875, "grad_norm": 1221.827880859375, "learning_rate": 6.21234623165252e-06, "loss": 79.5845, "step": 59380 }, { "epoch": 0.491293378003888, "grad_norm": 780.6744384765625, "learning_rate": 6.210977903001406e-06, "loss": 88.4106, "step": 59390 }, { "epoch": 0.4913761012532572, "grad_norm": 755.7965087890625, "learning_rate": 6.209609477998339e-06, "loss": 97.9078, "step": 59400 }, { "epoch": 0.49145882450262646, "grad_norm": 883.141357421875, "learning_rate": 6.2082409567521975e-06, "loss": 97.257, "step": 59410 }, { "epoch": 0.4915415477519957, "grad_norm": 1329.85107421875, "learning_rate": 6.206872339371867e-06, "loss": 101.6907, "step": 59420 }, { "epoch": 0.4916242710013649, "grad_norm": 2107.642578125, "learning_rate": 6.205503625966247e-06, "loss": 109.3981, "step": 59430 }, { "epoch": 0.49170699425073416, "grad_norm": 663.9820556640625, "learning_rate": 6.204134816644233e-06, "loss": 77.1192, "step": 59440 }, { "epoch": 0.4917897175001034, "grad_norm": 1082.100341796875, "learning_rate": 6.2027659115147375e-06, "loss": 109.6853, "step": 59450 }, { "epoch": 0.4918724407494726, "grad_norm": 734.315185546875, "learning_rate": 6.201396910686679e-06, "loss": 92.7889, "step": 59460 }, { "epoch": 0.49195516399884187, "grad_norm": 825.7645874023438, "learning_rate": 6.200027814268984e-06, "loss": 82.5341, "step": 59470 }, { "epoch": 0.4920378872482111, "grad_norm": 990.0794677734375, "learning_rate": 6.198658622370582e-06, "loss": 91.5214, "step": 59480 }, { "epoch": 0.4921206104975803, "grad_norm": 672.1451416015625, "learning_rate": 6.197289335100412e-06, "loss": 99.9061, "step": 59490 }, { "epoch": 0.4922033337469496, "grad_norm": 663.6240234375, "learning_rate": 6.195919952567426e-06, "loss": 84.1417, "step": 59500 }, { "epoch": 0.49228605699631883, "grad_norm": 1181.8443603515625, "learning_rate": 6.194550474880579e-06, "loss": 101.2219, "step": 59510 }, { "epoch": 0.49236878024568803, "grad_norm": 1052.6051025390625, "learning_rate": 6.193180902148833e-06, "loss": 101.8608, "step": 59520 }, { "epoch": 0.4924515034950573, "grad_norm": 467.2412109375, "learning_rate": 6.1918112344811575e-06, "loss": 64.4284, "step": 59530 }, { "epoch": 0.49253422674442654, "grad_norm": 756.0142822265625, "learning_rate": 6.190441471986533e-06, "loss": 93.9991, "step": 59540 }, { "epoch": 0.49261694999379574, "grad_norm": 650.9651489257812, "learning_rate": 6.18907161477394e-06, "loss": 134.6619, "step": 59550 }, { "epoch": 0.492699673243165, "grad_norm": 845.457275390625, "learning_rate": 6.187701662952381e-06, "loss": 71.9417, "step": 59560 }, { "epoch": 0.49278239649253425, "grad_norm": 1039.0081787109375, "learning_rate": 6.18633161663085e-06, "loss": 96.217, "step": 59570 }, { "epoch": 0.49286511974190345, "grad_norm": 1419.552734375, "learning_rate": 6.184961475918355e-06, "loss": 117.6555, "step": 59580 }, { "epoch": 0.4929478429912727, "grad_norm": 1182.2574462890625, "learning_rate": 6.183591240923914e-06, "loss": 116.7017, "step": 59590 }, { "epoch": 0.49303056624064195, "grad_norm": 1182.273681640625, "learning_rate": 6.182220911756551e-06, "loss": 96.5914, "step": 59600 }, { "epoch": 0.49311328949001115, "grad_norm": 1710.057861328125, "learning_rate": 6.1808504885252955e-06, "loss": 116.0724, "step": 59610 }, { "epoch": 0.4931960127393804, "grad_norm": 738.5501708984375, "learning_rate": 6.179479971339186e-06, "loss": 102.2499, "step": 59620 }, { "epoch": 0.49327873598874966, "grad_norm": 1123.935791015625, "learning_rate": 6.178109360307267e-06, "loss": 98.9781, "step": 59630 }, { "epoch": 0.49336145923811886, "grad_norm": 1302.947998046875, "learning_rate": 6.176738655538594e-06, "loss": 86.7837, "step": 59640 }, { "epoch": 0.4934441824874881, "grad_norm": 1149.8046875, "learning_rate": 6.175367857142227e-06, "loss": 101.772, "step": 59650 }, { "epoch": 0.49352690573685737, "grad_norm": 313.6778564453125, "learning_rate": 6.173996965227234e-06, "loss": 67.0711, "step": 59660 }, { "epoch": 0.49360962898622657, "grad_norm": 992.2843627929688, "learning_rate": 6.17262597990269e-06, "loss": 122.4615, "step": 59670 }, { "epoch": 0.4936923522355958, "grad_norm": 544.5889282226562, "learning_rate": 6.171254901277678e-06, "loss": 118.6765, "step": 59680 }, { "epoch": 0.4937750754849651, "grad_norm": 760.4612426757812, "learning_rate": 6.169883729461289e-06, "loss": 73.9667, "step": 59690 }, { "epoch": 0.4938577987343343, "grad_norm": 879.9442749023438, "learning_rate": 6.16851246456262e-06, "loss": 121.3691, "step": 59700 }, { "epoch": 0.49394052198370353, "grad_norm": 838.228271484375, "learning_rate": 6.167141106690778e-06, "loss": 81.8349, "step": 59710 }, { "epoch": 0.4940232452330727, "grad_norm": 600.8211059570312, "learning_rate": 6.1657696559548755e-06, "loss": 82.6974, "step": 59720 }, { "epoch": 0.494105968482442, "grad_norm": 1331.113525390625, "learning_rate": 6.16439811246403e-06, "loss": 91.3955, "step": 59730 }, { "epoch": 0.49418869173181124, "grad_norm": 1298.560791015625, "learning_rate": 6.163026476327371e-06, "loss": 127.3068, "step": 59740 }, { "epoch": 0.49427141498118043, "grad_norm": 1139.0467529296875, "learning_rate": 6.161654747654033e-06, "loss": 103.0033, "step": 59750 }, { "epoch": 0.4943541382305497, "grad_norm": 740.8123779296875, "learning_rate": 6.1602829265531585e-06, "loss": 111.8351, "step": 59760 }, { "epoch": 0.49443686147991894, "grad_norm": 811.9702758789062, "learning_rate": 6.158911013133896e-06, "loss": 106.3484, "step": 59770 }, { "epoch": 0.49451958472928814, "grad_norm": 751.2868041992188, "learning_rate": 6.157539007505402e-06, "loss": 81.8818, "step": 59780 }, { "epoch": 0.4946023079786574, "grad_norm": 596.0902099609375, "learning_rate": 6.156166909776842e-06, "loss": 141.5883, "step": 59790 }, { "epoch": 0.49468503122802665, "grad_norm": 669.7060546875, "learning_rate": 6.154794720057388e-06, "loss": 92.8498, "step": 59800 }, { "epoch": 0.49476775447739585, "grad_norm": 769.1928100585938, "learning_rate": 6.153422438456218e-06, "loss": 111.0396, "step": 59810 }, { "epoch": 0.4948504777267651, "grad_norm": 1065.379638671875, "learning_rate": 6.1520500650825175e-06, "loss": 109.4096, "step": 59820 }, { "epoch": 0.49493320097613436, "grad_norm": 1148.633056640625, "learning_rate": 6.150677600045479e-06, "loss": 107.9428, "step": 59830 }, { "epoch": 0.49501592422550356, "grad_norm": 530.499267578125, "learning_rate": 6.1493050434543065e-06, "loss": 80.3538, "step": 59840 }, { "epoch": 0.4950986474748728, "grad_norm": 888.7738647460938, "learning_rate": 6.1479323954182055e-06, "loss": 82.7131, "step": 59850 }, { "epoch": 0.49518137072424206, "grad_norm": 1272.56884765625, "learning_rate": 6.146559656046394e-06, "loss": 115.8495, "step": 59860 }, { "epoch": 0.49526409397361126, "grad_norm": 1251.605712890625, "learning_rate": 6.1451868254480914e-06, "loss": 103.9669, "step": 59870 }, { "epoch": 0.4953468172229805, "grad_norm": 1469.060546875, "learning_rate": 6.143813903732527e-06, "loss": 98.5221, "step": 59880 }, { "epoch": 0.49542954047234977, "grad_norm": 613.4310913085938, "learning_rate": 6.142440891008941e-06, "loss": 95.7254, "step": 59890 }, { "epoch": 0.49551226372171897, "grad_norm": 1487.4815673828125, "learning_rate": 6.141067787386579e-06, "loss": 134.6069, "step": 59900 }, { "epoch": 0.4955949869710882, "grad_norm": 1083.0224609375, "learning_rate": 6.139694592974687e-06, "loss": 103.1761, "step": 59910 }, { "epoch": 0.4956777102204575, "grad_norm": 1133.168701171875, "learning_rate": 6.1383213078825275e-06, "loss": 111.6922, "step": 59920 }, { "epoch": 0.4957604334698267, "grad_norm": 1490.791015625, "learning_rate": 6.136947932219365e-06, "loss": 102.0797, "step": 59930 }, { "epoch": 0.49584315671919593, "grad_norm": 717.7562866210938, "learning_rate": 6.135574466094475e-06, "loss": 103.1788, "step": 59940 }, { "epoch": 0.4959258799685652, "grad_norm": 707.7864379882812, "learning_rate": 6.134200909617135e-06, "loss": 84.5769, "step": 59950 }, { "epoch": 0.4960086032179344, "grad_norm": 607.8535766601562, "learning_rate": 6.132827262896634e-06, "loss": 109.7207, "step": 59960 }, { "epoch": 0.49609132646730364, "grad_norm": 575.4254150390625, "learning_rate": 6.131453526042267e-06, "loss": 94.0905, "step": 59970 }, { "epoch": 0.4961740497166729, "grad_norm": 1168.943603515625, "learning_rate": 6.130079699163335e-06, "loss": 123.4409, "step": 59980 }, { "epoch": 0.4962567729660421, "grad_norm": 806.4592895507812, "learning_rate": 6.128705782369149e-06, "loss": 126.4985, "step": 59990 }, { "epoch": 0.49633949621541135, "grad_norm": 994.6439208984375, "learning_rate": 6.127331775769023e-06, "loss": 81.3202, "step": 60000 }, { "epoch": 0.4964222194647806, "grad_norm": 764.6784057617188, "learning_rate": 6.125957679472282e-06, "loss": 98.2079, "step": 60010 }, { "epoch": 0.4965049427141498, "grad_norm": 963.4498901367188, "learning_rate": 6.124583493588254e-06, "loss": 107.3761, "step": 60020 }, { "epoch": 0.49658766596351905, "grad_norm": 763.531494140625, "learning_rate": 6.123209218226282e-06, "loss": 86.249, "step": 60030 }, { "epoch": 0.4966703892128883, "grad_norm": 1130.901611328125, "learning_rate": 6.121834853495704e-06, "loss": 80.7327, "step": 60040 }, { "epoch": 0.4967531124622575, "grad_norm": 830.3397827148438, "learning_rate": 6.120460399505876e-06, "loss": 79.5612, "step": 60050 }, { "epoch": 0.49683583571162676, "grad_norm": 1038.9749755859375, "learning_rate": 6.119085856366158e-06, "loss": 104.2222, "step": 60060 }, { "epoch": 0.496918558960996, "grad_norm": 1272.2034912109375, "learning_rate": 6.117711224185913e-06, "loss": 95.4032, "step": 60070 }, { "epoch": 0.4970012822103652, "grad_norm": 678.36083984375, "learning_rate": 6.116336503074516e-06, "loss": 111.6325, "step": 60080 }, { "epoch": 0.49708400545973447, "grad_norm": 902.4628295898438, "learning_rate": 6.114961693141346e-06, "loss": 109.0453, "step": 60090 }, { "epoch": 0.49716672870910367, "grad_norm": 1161.1533203125, "learning_rate": 6.113586794495792e-06, "loss": 102.0886, "step": 60100 }, { "epoch": 0.4972494519584729, "grad_norm": 883.01025390625, "learning_rate": 6.112211807247246e-06, "loss": 79.8961, "step": 60110 }, { "epoch": 0.4973321752078422, "grad_norm": 560.7022094726562, "learning_rate": 6.110836731505112e-06, "loss": 104.4858, "step": 60120 }, { "epoch": 0.4974148984572114, "grad_norm": 563.2030639648438, "learning_rate": 6.109461567378796e-06, "loss": 76.5894, "step": 60130 }, { "epoch": 0.49749762170658063, "grad_norm": 567.1194458007812, "learning_rate": 6.108086314977717e-06, "loss": 74.5852, "step": 60140 }, { "epoch": 0.4975803449559499, "grad_norm": 1001.0436401367188, "learning_rate": 6.106710974411294e-06, "loss": 89.3131, "step": 60150 }, { "epoch": 0.4976630682053191, "grad_norm": 1134.8504638671875, "learning_rate": 6.105335545788957e-06, "loss": 92.9293, "step": 60160 }, { "epoch": 0.49774579145468834, "grad_norm": 1836.4036865234375, "learning_rate": 6.103960029220145e-06, "loss": 96.6835, "step": 60170 }, { "epoch": 0.4978285147040576, "grad_norm": 884.6735229492188, "learning_rate": 6.102584424814299e-06, "loss": 96.8814, "step": 60180 }, { "epoch": 0.4979112379534268, "grad_norm": 1286.9527587890625, "learning_rate": 6.101208732680872e-06, "loss": 84.8719, "step": 60190 }, { "epoch": 0.49799396120279604, "grad_norm": 1096.192138671875, "learning_rate": 6.09983295292932e-06, "loss": 133.576, "step": 60200 }, { "epoch": 0.4980766844521653, "grad_norm": 396.5540771484375, "learning_rate": 6.0984570856691046e-06, "loss": 71.0101, "step": 60210 }, { "epoch": 0.4981594077015345, "grad_norm": 1324.0350341796875, "learning_rate": 6.097081131009703e-06, "loss": 84.5404, "step": 60220 }, { "epoch": 0.49824213095090375, "grad_norm": 1569.8370361328125, "learning_rate": 6.095705089060589e-06, "loss": 131.2618, "step": 60230 }, { "epoch": 0.498324854200273, "grad_norm": 452.867919921875, "learning_rate": 6.094328959931252e-06, "loss": 98.2553, "step": 60240 }, { "epoch": 0.4984075774496422, "grad_norm": 754.5675659179688, "learning_rate": 6.092952743731179e-06, "loss": 109.5702, "step": 60250 }, { "epoch": 0.49849030069901146, "grad_norm": 1038.2977294921875, "learning_rate": 6.091576440569873e-06, "loss": 102.3048, "step": 60260 }, { "epoch": 0.4985730239483807, "grad_norm": 1505.541748046875, "learning_rate": 6.09020005055684e-06, "loss": 93.0384, "step": 60270 }, { "epoch": 0.4986557471977499, "grad_norm": 714.1956787109375, "learning_rate": 6.088823573801592e-06, "loss": 89.4964, "step": 60280 }, { "epoch": 0.49873847044711916, "grad_norm": 1760.59033203125, "learning_rate": 6.087447010413651e-06, "loss": 123.4378, "step": 60290 }, { "epoch": 0.4988211936964884, "grad_norm": 647.3226318359375, "learning_rate": 6.08607036050254e-06, "loss": 97.7962, "step": 60300 }, { "epoch": 0.4989039169458576, "grad_norm": 670.925048828125, "learning_rate": 6.084693624177794e-06, "loss": 90.9299, "step": 60310 }, { "epoch": 0.49898664019522687, "grad_norm": 861.1676635742188, "learning_rate": 6.083316801548956e-06, "loss": 96.199, "step": 60320 }, { "epoch": 0.4990693634445961, "grad_norm": 844.1892700195312, "learning_rate": 6.081939892725572e-06, "loss": 89.0784, "step": 60330 }, { "epoch": 0.4991520866939653, "grad_norm": 1057.48876953125, "learning_rate": 6.080562897817196e-06, "loss": 80.8512, "step": 60340 }, { "epoch": 0.4992348099433346, "grad_norm": 547.9786987304688, "learning_rate": 6.079185816933388e-06, "loss": 123.7075, "step": 60350 }, { "epoch": 0.49931753319270383, "grad_norm": 813.2322998046875, "learning_rate": 6.077808650183718e-06, "loss": 75.0818, "step": 60360 }, { "epoch": 0.49940025644207303, "grad_norm": 1088.018310546875, "learning_rate": 6.076431397677762e-06, "loss": 99.3224, "step": 60370 }, { "epoch": 0.4994829796914423, "grad_norm": 443.6618347167969, "learning_rate": 6.0750540595250986e-06, "loss": 92.7872, "step": 60380 }, { "epoch": 0.49956570294081154, "grad_norm": 1097.77099609375, "learning_rate": 6.073676635835317e-06, "loss": 101.5357, "step": 60390 }, { "epoch": 0.49964842619018074, "grad_norm": 809.2193603515625, "learning_rate": 6.072299126718012e-06, "loss": 102.8223, "step": 60400 }, { "epoch": 0.49973114943955, "grad_norm": 1209.04296875, "learning_rate": 6.070921532282788e-06, "loss": 90.7184, "step": 60410 }, { "epoch": 0.49981387268891925, "grad_norm": 483.89910888671875, "learning_rate": 6.0695438526392536e-06, "loss": 90.6916, "step": 60420 }, { "epoch": 0.49989659593828845, "grad_norm": 939.6087036132812, "learning_rate": 6.068166087897022e-06, "loss": 95.3435, "step": 60430 }, { "epoch": 0.4999793191876577, "grad_norm": 1028.2034912109375, "learning_rate": 6.066788238165717e-06, "loss": 122.2318, "step": 60440 }, { "epoch": 0.5000620424370269, "grad_norm": 871.5949096679688, "learning_rate": 6.0654103035549686e-06, "loss": 107.04, "step": 60450 }, { "epoch": 0.5001447656863962, "grad_norm": 631.7724609375, "learning_rate": 6.064032284174411e-06, "loss": 107.0457, "step": 60460 }, { "epoch": 0.5002274889357654, "grad_norm": 1057.8795166015625, "learning_rate": 6.062654180133689e-06, "loss": 95.4936, "step": 60470 }, { "epoch": 0.5003102121851346, "grad_norm": 625.803466796875, "learning_rate": 6.06127599154245e-06, "loss": 112.727, "step": 60480 }, { "epoch": 0.5003929354345039, "grad_norm": 957.1911010742188, "learning_rate": 6.059897718510351e-06, "loss": 141.4194, "step": 60490 }, { "epoch": 0.5004756586838731, "grad_norm": 1082.177734375, "learning_rate": 6.058519361147055e-06, "loss": 106.0662, "step": 60500 }, { "epoch": 0.5005583819332423, "grad_norm": 961.278076171875, "learning_rate": 6.057140919562231e-06, "loss": 92.1514, "step": 60510 }, { "epoch": 0.5006411051826116, "grad_norm": 1194.2098388671875, "learning_rate": 6.055762393865555e-06, "loss": 114.0582, "step": 60520 }, { "epoch": 0.5007238284319808, "grad_norm": 2295.308349609375, "learning_rate": 6.054383784166712e-06, "loss": 122.28, "step": 60530 }, { "epoch": 0.50080655168135, "grad_norm": 950.94482421875, "learning_rate": 6.05300509057539e-06, "loss": 65.6176, "step": 60540 }, { "epoch": 0.5008892749307193, "grad_norm": 575.2659301757812, "learning_rate": 6.051626313201285e-06, "loss": 75.2322, "step": 60550 }, { "epoch": 0.5009719981800885, "grad_norm": 955.7214965820312, "learning_rate": 6.0502474521541014e-06, "loss": 78.4364, "step": 60560 }, { "epoch": 0.5010547214294577, "grad_norm": 1058.46240234375, "learning_rate": 6.048868507543547e-06, "loss": 76.487, "step": 60570 }, { "epoch": 0.501137444678827, "grad_norm": 1243.02880859375, "learning_rate": 6.047489479479339e-06, "loss": 94.1711, "step": 60580 }, { "epoch": 0.5012201679281962, "grad_norm": 539.5478515625, "learning_rate": 6.046110368071201e-06, "loss": 80.0069, "step": 60590 }, { "epoch": 0.5013028911775654, "grad_norm": 1104.5489501953125, "learning_rate": 6.044731173428862e-06, "loss": 109.4119, "step": 60600 }, { "epoch": 0.5013856144269347, "grad_norm": 547.853515625, "learning_rate": 6.043351895662059e-06, "loss": 74.3687, "step": 60610 }, { "epoch": 0.5014683376763039, "grad_norm": 815.2512817382812, "learning_rate": 6.041972534880533e-06, "loss": 98.8586, "step": 60620 }, { "epoch": 0.5015510609256731, "grad_norm": 934.5771484375, "learning_rate": 6.040593091194035e-06, "loss": 113.7061, "step": 60630 }, { "epoch": 0.5016337841750423, "grad_norm": 980.511962890625, "learning_rate": 6.039213564712319e-06, "loss": 77.5835, "step": 60640 }, { "epoch": 0.5017165074244117, "grad_norm": 875.10693359375, "learning_rate": 6.03783395554515e-06, "loss": 90.5567, "step": 60650 }, { "epoch": 0.5017992306737808, "grad_norm": 564.4214477539062, "learning_rate": 6.036454263802297e-06, "loss": 99.0541, "step": 60660 }, { "epoch": 0.50188195392315, "grad_norm": 859.4292602539062, "learning_rate": 6.035074489593536e-06, "loss": 81.3836, "step": 60670 }, { "epoch": 0.5019646771725194, "grad_norm": 1366.642333984375, "learning_rate": 6.033694633028644e-06, "loss": 89.8926, "step": 60680 }, { "epoch": 0.5020474004218886, "grad_norm": 707.115478515625, "learning_rate": 6.032314694217416e-06, "loss": 119.7774, "step": 60690 }, { "epoch": 0.5021301236712578, "grad_norm": 509.8358154296875, "learning_rate": 6.030934673269646e-06, "loss": 76.9633, "step": 60700 }, { "epoch": 0.5022128469206271, "grad_norm": 1127.6051025390625, "learning_rate": 6.029554570295135e-06, "loss": 98.3396, "step": 60710 }, { "epoch": 0.5022955701699963, "grad_norm": 489.0888366699219, "learning_rate": 6.028174385403693e-06, "loss": 74.8344, "step": 60720 }, { "epoch": 0.5023782934193655, "grad_norm": 563.7764282226562, "learning_rate": 6.026794118705133e-06, "loss": 123.1201, "step": 60730 }, { "epoch": 0.5024610166687348, "grad_norm": 922.4900512695312, "learning_rate": 6.025413770309278e-06, "loss": 105.9961, "step": 60740 }, { "epoch": 0.502543739918104, "grad_norm": 711.1076049804688, "learning_rate": 6.024033340325954e-06, "loss": 138.0339, "step": 60750 }, { "epoch": 0.5026264631674732, "grad_norm": 1032.06787109375, "learning_rate": 6.022652828864999e-06, "loss": 129.3473, "step": 60760 }, { "epoch": 0.5027091864168425, "grad_norm": 987.8187255859375, "learning_rate": 6.0212722360362496e-06, "loss": 98.7341, "step": 60770 }, { "epoch": 0.5027919096662117, "grad_norm": 922.1596069335938, "learning_rate": 6.019891561949554e-06, "loss": 113.0199, "step": 60780 }, { "epoch": 0.5028746329155809, "grad_norm": 966.7632446289062, "learning_rate": 6.01851080671477e-06, "loss": 95.6403, "step": 60790 }, { "epoch": 0.5029573561649502, "grad_norm": 626.9385986328125, "learning_rate": 6.017129970441756e-06, "loss": 85.6963, "step": 60800 }, { "epoch": 0.5030400794143194, "grad_norm": 1487.6356201171875, "learning_rate": 6.015749053240378e-06, "loss": 97.1684, "step": 60810 }, { "epoch": 0.5031228026636886, "grad_norm": 789.6949462890625, "learning_rate": 6.0143680552205075e-06, "loss": 103.032, "step": 60820 }, { "epoch": 0.5032055259130579, "grad_norm": 737.3887329101562, "learning_rate": 6.012986976492025e-06, "loss": 86.7182, "step": 60830 }, { "epoch": 0.5032882491624271, "grad_norm": 600.4498291015625, "learning_rate": 6.011605817164822e-06, "loss": 94.8057, "step": 60840 }, { "epoch": 0.5033709724117963, "grad_norm": 593.8978271484375, "learning_rate": 6.0102245773487855e-06, "loss": 107.6144, "step": 60850 }, { "epoch": 0.5034536956611656, "grad_norm": 1065.4730224609375, "learning_rate": 6.008843257153815e-06, "loss": 90.4793, "step": 60860 }, { "epoch": 0.5035364189105348, "grad_norm": 824.6838989257812, "learning_rate": 6.007461856689815e-06, "loss": 75.2064, "step": 60870 }, { "epoch": 0.503619142159904, "grad_norm": 450.9040832519531, "learning_rate": 6.0060803760667e-06, "loss": 88.3228, "step": 60880 }, { "epoch": 0.5037018654092733, "grad_norm": 829.0480346679688, "learning_rate": 6.004698815394389e-06, "loss": 124.8127, "step": 60890 }, { "epoch": 0.5037845886586425, "grad_norm": 1101.244384765625, "learning_rate": 6.003317174782801e-06, "loss": 77.5046, "step": 60900 }, { "epoch": 0.5038673119080117, "grad_norm": 1373.401611328125, "learning_rate": 6.001935454341872e-06, "loss": 92.4626, "step": 60910 }, { "epoch": 0.503950035157381, "grad_norm": 489.91644287109375, "learning_rate": 6.000553654181536e-06, "loss": 92.368, "step": 60920 }, { "epoch": 0.5040327584067502, "grad_norm": 813.798583984375, "learning_rate": 5.999171774411737e-06, "loss": 96.8709, "step": 60930 }, { "epoch": 0.5041154816561194, "grad_norm": 1986.8172607421875, "learning_rate": 5.997789815142427e-06, "loss": 83.3268, "step": 60940 }, { "epoch": 0.5041982049054887, "grad_norm": 721.0343017578125, "learning_rate": 5.99640777648356e-06, "loss": 111.3407, "step": 60950 }, { "epoch": 0.5042809281548579, "grad_norm": 1814.00146484375, "learning_rate": 5.9950256585450995e-06, "loss": 101.1115, "step": 60960 }, { "epoch": 0.5043636514042271, "grad_norm": 1539.85009765625, "learning_rate": 5.993643461437013e-06, "loss": 109.2939, "step": 60970 }, { "epoch": 0.5044463746535964, "grad_norm": 1455.0811767578125, "learning_rate": 5.992261185269278e-06, "loss": 116.5055, "step": 60980 }, { "epoch": 0.5045290979029656, "grad_norm": 673.0990600585938, "learning_rate": 5.990878830151873e-06, "loss": 84.7869, "step": 60990 }, { "epoch": 0.5046118211523348, "grad_norm": 990.6354370117188, "learning_rate": 5.989496396194787e-06, "loss": 117.0743, "step": 61000 }, { "epoch": 0.5046945444017041, "grad_norm": 659.1134033203125, "learning_rate": 5.988113883508016e-06, "loss": 85.4938, "step": 61010 }, { "epoch": 0.5047772676510733, "grad_norm": 1276.8919677734375, "learning_rate": 5.986731292201555e-06, "loss": 111.5496, "step": 61020 }, { "epoch": 0.5048599909004425, "grad_norm": 1091.1859130859375, "learning_rate": 5.985348622385415e-06, "loss": 139.6149, "step": 61030 }, { "epoch": 0.5049427141498118, "grad_norm": 1123.3651123046875, "learning_rate": 5.9839658741696085e-06, "loss": 60.2066, "step": 61040 }, { "epoch": 0.505025437399181, "grad_norm": 405.6517028808594, "learning_rate": 5.982583047664151e-06, "loss": 89.4623, "step": 61050 }, { "epoch": 0.5051081606485502, "grad_norm": 489.998046875, "learning_rate": 5.981200142979071e-06, "loss": 70.606, "step": 61060 }, { "epoch": 0.5051908838979196, "grad_norm": 611.7427368164062, "learning_rate": 5.9798171602244e-06, "loss": 90.265, "step": 61070 }, { "epoch": 0.5052736071472888, "grad_norm": 500.9188537597656, "learning_rate": 5.978434099510172e-06, "loss": 102.3768, "step": 61080 }, { "epoch": 0.505356330396658, "grad_norm": 939.5604858398438, "learning_rate": 5.977050960946433e-06, "loss": 95.7826, "step": 61090 }, { "epoch": 0.5054390536460273, "grad_norm": 1079.417724609375, "learning_rate": 5.975667744643235e-06, "loss": 57.3004, "step": 61100 }, { "epoch": 0.5055217768953965, "grad_norm": 801.0790405273438, "learning_rate": 5.974284450710631e-06, "loss": 100.4074, "step": 61110 }, { "epoch": 0.5056045001447657, "grad_norm": 1189.5526123046875, "learning_rate": 5.972901079258685e-06, "loss": 89.206, "step": 61120 }, { "epoch": 0.505687223394135, "grad_norm": 544.3245239257812, "learning_rate": 5.971517630397465e-06, "loss": 87.3128, "step": 61130 }, { "epoch": 0.5057699466435042, "grad_norm": 804.9583129882812, "learning_rate": 5.970134104237046e-06, "loss": 118.5412, "step": 61140 }, { "epoch": 0.5058526698928734, "grad_norm": 878.0859985351562, "learning_rate": 5.96875050088751e-06, "loss": 151.7707, "step": 61150 }, { "epoch": 0.5059353931422427, "grad_norm": 555.1686401367188, "learning_rate": 5.9673668204589396e-06, "loss": 80.5509, "step": 61160 }, { "epoch": 0.5060181163916119, "grad_norm": 887.9091186523438, "learning_rate": 5.965983063061432e-06, "loss": 97.7346, "step": 61170 }, { "epoch": 0.5061008396409811, "grad_norm": 2689.885498046875, "learning_rate": 5.964599228805087e-06, "loss": 109.0754, "step": 61180 }, { "epoch": 0.5061835628903504, "grad_norm": 851.7760009765625, "learning_rate": 5.963215317800008e-06, "loss": 111.0486, "step": 61190 }, { "epoch": 0.5062662861397196, "grad_norm": 912.8546142578125, "learning_rate": 5.961831330156306e-06, "loss": 131.0121, "step": 61200 }, { "epoch": 0.5063490093890888, "grad_norm": 641.365478515625, "learning_rate": 5.960447265984098e-06, "loss": 88.7224, "step": 61210 }, { "epoch": 0.5064317326384581, "grad_norm": 1167.5374755859375, "learning_rate": 5.95906312539351e-06, "loss": 90.7471, "step": 61220 }, { "epoch": 0.5065144558878273, "grad_norm": 808.2752685546875, "learning_rate": 5.9576789084946705e-06, "loss": 74.6994, "step": 61230 }, { "epoch": 0.5065971791371965, "grad_norm": 1379.4791259765625, "learning_rate": 5.956294615397716e-06, "loss": 105.455, "step": 61240 }, { "epoch": 0.5066799023865658, "grad_norm": 1430.2303466796875, "learning_rate": 5.954910246212787e-06, "loss": 126.3884, "step": 61250 }, { "epoch": 0.506762625635935, "grad_norm": 1149.9755859375, "learning_rate": 5.953525801050032e-06, "loss": 83.9256, "step": 61260 }, { "epoch": 0.5068453488853042, "grad_norm": 558.4307861328125, "learning_rate": 5.952141280019605e-06, "loss": 96.2179, "step": 61270 }, { "epoch": 0.5069280721346735, "grad_norm": 742.656494140625, "learning_rate": 5.950756683231667e-06, "loss": 74.2085, "step": 61280 }, { "epoch": 0.5070107953840427, "grad_norm": 596.1998291015625, "learning_rate": 5.949372010796384e-06, "loss": 52.8268, "step": 61290 }, { "epoch": 0.5070935186334119, "grad_norm": 881.3981323242188, "learning_rate": 5.947987262823924e-06, "loss": 107.5389, "step": 61300 }, { "epoch": 0.5071762418827812, "grad_norm": 675.0526123046875, "learning_rate": 5.94660243942447e-06, "loss": 123.2516, "step": 61310 }, { "epoch": 0.5072589651321504, "grad_norm": 889.6369018554688, "learning_rate": 5.945217540708206e-06, "loss": 86.9526, "step": 61320 }, { "epoch": 0.5073416883815196, "grad_norm": 736.0260620117188, "learning_rate": 5.9438325667853185e-06, "loss": 94.7347, "step": 61330 }, { "epoch": 0.5074244116308889, "grad_norm": 899.1821899414062, "learning_rate": 5.942447517766005e-06, "loss": 106.1979, "step": 61340 }, { "epoch": 0.5075071348802581, "grad_norm": 760.8129272460938, "learning_rate": 5.941062393760467e-06, "loss": 94.1305, "step": 61350 }, { "epoch": 0.5075898581296273, "grad_norm": 434.6575622558594, "learning_rate": 5.939677194878915e-06, "loss": 140.0839, "step": 61360 }, { "epoch": 0.5076725813789965, "grad_norm": 577.9569091796875, "learning_rate": 5.93829192123156e-06, "loss": 74.5071, "step": 61370 }, { "epoch": 0.5077553046283658, "grad_norm": 694.65625, "learning_rate": 5.936906572928625e-06, "loss": 120.0574, "step": 61380 }, { "epoch": 0.507838027877735, "grad_norm": 772.7642211914062, "learning_rate": 5.935521150080331e-06, "loss": 75.3494, "step": 61390 }, { "epoch": 0.5079207511271042, "grad_norm": 790.8641967773438, "learning_rate": 5.934135652796914e-06, "loss": 87.367, "step": 61400 }, { "epoch": 0.5080034743764735, "grad_norm": 813.5361938476562, "learning_rate": 5.9327500811886095e-06, "loss": 83.0071, "step": 61410 }, { "epoch": 0.5080861976258427, "grad_norm": 437.74139404296875, "learning_rate": 5.931364435365663e-06, "loss": 115.0982, "step": 61420 }, { "epoch": 0.5081689208752119, "grad_norm": 890.75927734375, "learning_rate": 5.929978715438322e-06, "loss": 75.1045, "step": 61430 }, { "epoch": 0.5082516441245812, "grad_norm": 861.6392822265625, "learning_rate": 5.928592921516843e-06, "loss": 87.0132, "step": 61440 }, { "epoch": 0.5083343673739504, "grad_norm": 401.1084289550781, "learning_rate": 5.9272070537114855e-06, "loss": 111.1549, "step": 61450 }, { "epoch": 0.5084170906233196, "grad_norm": 886.0931396484375, "learning_rate": 5.92582111213252e-06, "loss": 130.5894, "step": 61460 }, { "epoch": 0.5084998138726889, "grad_norm": 573.5504150390625, "learning_rate": 5.924435096890216e-06, "loss": 96.3187, "step": 61470 }, { "epoch": 0.5085825371220581, "grad_norm": 915.1154174804688, "learning_rate": 5.923049008094855e-06, "loss": 102.2801, "step": 61480 }, { "epoch": 0.5086652603714273, "grad_norm": 789.848876953125, "learning_rate": 5.921662845856719e-06, "loss": 93.1844, "step": 61490 }, { "epoch": 0.5087479836207967, "grad_norm": 1229.4134521484375, "learning_rate": 5.920276610286102e-06, "loss": 119.4875, "step": 61500 }, { "epoch": 0.5088307068701658, "grad_norm": 1035.0843505859375, "learning_rate": 5.918890301493299e-06, "loss": 95.7163, "step": 61510 }, { "epoch": 0.508913430119535, "grad_norm": 880.9725952148438, "learning_rate": 5.91750391958861e-06, "loss": 73.2856, "step": 61520 }, { "epoch": 0.5089961533689044, "grad_norm": 646.2332763671875, "learning_rate": 5.916117464682346e-06, "loss": 106.2007, "step": 61530 }, { "epoch": 0.5090788766182736, "grad_norm": 485.7245178222656, "learning_rate": 5.914730936884819e-06, "loss": 112.5809, "step": 61540 }, { "epoch": 0.5091615998676428, "grad_norm": 1304.14794921875, "learning_rate": 5.91334433630635e-06, "loss": 124.1074, "step": 61550 }, { "epoch": 0.5092443231170121, "grad_norm": 911.8284912109375, "learning_rate": 5.911957663057264e-06, "loss": 87.838, "step": 61560 }, { "epoch": 0.5093270463663813, "grad_norm": 863.0823364257812, "learning_rate": 5.910570917247892e-06, "loss": 79.3177, "step": 61570 }, { "epoch": 0.5094097696157505, "grad_norm": 518.0543823242188, "learning_rate": 5.909184098988571e-06, "loss": 97.5815, "step": 61580 }, { "epoch": 0.5094924928651198, "grad_norm": 641.208984375, "learning_rate": 5.907797208389644e-06, "loss": 112.5585, "step": 61590 }, { "epoch": 0.509575216114489, "grad_norm": 644.5621337890625, "learning_rate": 5.906410245561459e-06, "loss": 80.3052, "step": 61600 }, { "epoch": 0.5096579393638582, "grad_norm": 1210.9039306640625, "learning_rate": 5.90502321061437e-06, "loss": 103.0409, "step": 61610 }, { "epoch": 0.5097406626132275, "grad_norm": 1110.852294921875, "learning_rate": 5.90363610365874e-06, "loss": 120.3473, "step": 61620 }, { "epoch": 0.5098233858625967, "grad_norm": 892.0844116210938, "learning_rate": 5.9022489248049295e-06, "loss": 101.4571, "step": 61630 }, { "epoch": 0.5099061091119659, "grad_norm": 456.140625, "learning_rate": 5.900861674163314e-06, "loss": 74.4327, "step": 61640 }, { "epoch": 0.5099888323613352, "grad_norm": 755.904541015625, "learning_rate": 5.89947435184427e-06, "loss": 90.8847, "step": 61650 }, { "epoch": 0.5100715556107044, "grad_norm": 1451.006591796875, "learning_rate": 5.89808695795818e-06, "loss": 101.8353, "step": 61660 }, { "epoch": 0.5101542788600736, "grad_norm": 1081.45263671875, "learning_rate": 5.896699492615432e-06, "loss": 86.6635, "step": 61670 }, { "epoch": 0.5102370021094429, "grad_norm": 497.5079040527344, "learning_rate": 5.895311955926419e-06, "loss": 83.6076, "step": 61680 }, { "epoch": 0.5103197253588121, "grad_norm": 643.5697631835938, "learning_rate": 5.893924348001544e-06, "loss": 97.4833, "step": 61690 }, { "epoch": 0.5104024486081813, "grad_norm": 956.6340942382812, "learning_rate": 5.8925366689512124e-06, "loss": 103.3445, "step": 61700 }, { "epoch": 0.5104851718575506, "grad_norm": 665.5700073242188, "learning_rate": 5.891148918885834e-06, "loss": 65.6439, "step": 61710 }, { "epoch": 0.5105678951069198, "grad_norm": 1049.6845703125, "learning_rate": 5.8897610979158245e-06, "loss": 89.6039, "step": 61720 }, { "epoch": 0.510650618356289, "grad_norm": 814.806396484375, "learning_rate": 5.888373206151608e-06, "loss": 102.2241, "step": 61730 }, { "epoch": 0.5107333416056583, "grad_norm": 841.8108520507812, "learning_rate": 5.886985243703612e-06, "loss": 103.3497, "step": 61740 }, { "epoch": 0.5108160648550275, "grad_norm": 488.0580749511719, "learning_rate": 5.885597210682273e-06, "loss": 117.5541, "step": 61750 }, { "epoch": 0.5108987881043967, "grad_norm": 780.1454467773438, "learning_rate": 5.884209107198027e-06, "loss": 118.4064, "step": 61760 }, { "epoch": 0.510981511353766, "grad_norm": 1028.7705078125, "learning_rate": 5.882820933361321e-06, "loss": 127.9731, "step": 61770 }, { "epoch": 0.5110642346031352, "grad_norm": 684.5718383789062, "learning_rate": 5.881432689282604e-06, "loss": 121.3453, "step": 61780 }, { "epoch": 0.5111469578525044, "grad_norm": 910.465087890625, "learning_rate": 5.880044375072333e-06, "loss": 135.5772, "step": 61790 }, { "epoch": 0.5112296811018737, "grad_norm": 1077.9769287109375, "learning_rate": 5.8786559908409715e-06, "loss": 110.6625, "step": 61800 }, { "epoch": 0.5113124043512429, "grad_norm": 1104.657470703125, "learning_rate": 5.877267536698984e-06, "loss": 91.3742, "step": 61810 }, { "epoch": 0.5113951276006121, "grad_norm": 924.7949829101562, "learning_rate": 5.875879012756845e-06, "loss": 93.8301, "step": 61820 }, { "epoch": 0.5114778508499814, "grad_norm": 830.5784301757812, "learning_rate": 5.8744904191250326e-06, "loss": 135.9706, "step": 61830 }, { "epoch": 0.5115605740993506, "grad_norm": 1287.56884765625, "learning_rate": 5.873101755914031e-06, "loss": 113.9375, "step": 61840 }, { "epoch": 0.5116432973487198, "grad_norm": 2128.367431640625, "learning_rate": 5.87171302323433e-06, "loss": 91.666, "step": 61850 }, { "epoch": 0.5117260205980891, "grad_norm": 1100.2708740234375, "learning_rate": 5.870324221196424e-06, "loss": 91.6138, "step": 61860 }, { "epoch": 0.5118087438474583, "grad_norm": 636.7507934570312, "learning_rate": 5.868935349910814e-06, "loss": 105.8906, "step": 61870 }, { "epoch": 0.5118914670968275, "grad_norm": 928.61083984375, "learning_rate": 5.867546409488006e-06, "loss": 99.2666, "step": 61880 }, { "epoch": 0.5119741903461968, "grad_norm": 562.5609130859375, "learning_rate": 5.8661574000385115e-06, "loss": 76.9907, "step": 61890 }, { "epoch": 0.512056913595566, "grad_norm": 390.1648864746094, "learning_rate": 5.864768321672848e-06, "loss": 76.5244, "step": 61900 }, { "epoch": 0.5121396368449352, "grad_norm": 944.2666625976562, "learning_rate": 5.863379174501538e-06, "loss": 98.3042, "step": 61910 }, { "epoch": 0.5122223600943046, "grad_norm": 1686.9356689453125, "learning_rate": 5.861989958635109e-06, "loss": 121.1383, "step": 61920 }, { "epoch": 0.5123050833436738, "grad_norm": 878.6798095703125, "learning_rate": 5.860600674184096e-06, "loss": 105.9373, "step": 61930 }, { "epoch": 0.512387806593043, "grad_norm": 528.4129638671875, "learning_rate": 5.859211321259036e-06, "loss": 83.7999, "step": 61940 }, { "epoch": 0.5124705298424123, "grad_norm": 1257.8131103515625, "learning_rate": 5.857821899970475e-06, "loss": 112.0766, "step": 61950 }, { "epoch": 0.5125532530917815, "grad_norm": 1426.787353515625, "learning_rate": 5.856432410428963e-06, "loss": 127.3927, "step": 61960 }, { "epoch": 0.5126359763411507, "grad_norm": 1306.6192626953125, "learning_rate": 5.8550428527450534e-06, "loss": 63.1702, "step": 61970 }, { "epoch": 0.51271869959052, "grad_norm": 1105.7908935546875, "learning_rate": 5.8536532270293076e-06, "loss": 80.9909, "step": 61980 }, { "epoch": 0.5128014228398892, "grad_norm": 1267.3624267578125, "learning_rate": 5.852263533392294e-06, "loss": 94.7298, "step": 61990 }, { "epoch": 0.5128841460892584, "grad_norm": 594.2600708007812, "learning_rate": 5.850873771944581e-06, "loss": 91.9315, "step": 62000 }, { "epoch": 0.5129668693386277, "grad_norm": 652.3613891601562, "learning_rate": 5.849483942796747e-06, "loss": 87.1766, "step": 62010 }, { "epoch": 0.5130495925879969, "grad_norm": 602.3596801757812, "learning_rate": 5.848094046059375e-06, "loss": 124.0294, "step": 62020 }, { "epoch": 0.5131323158373661, "grad_norm": 573.1605224609375, "learning_rate": 5.846704081843052e-06, "loss": 111.6119, "step": 62030 }, { "epoch": 0.5132150390867354, "grad_norm": 780.5606689453125, "learning_rate": 5.84531405025837e-06, "loss": 82.9944, "step": 62040 }, { "epoch": 0.5132977623361046, "grad_norm": 913.4967041015625, "learning_rate": 5.843923951415931e-06, "loss": 108.7144, "step": 62050 }, { "epoch": 0.5133804855854738, "grad_norm": 1110.280029296875, "learning_rate": 5.842533785426334e-06, "loss": 101.928, "step": 62060 }, { "epoch": 0.5134632088348431, "grad_norm": 1380.97314453125, "learning_rate": 5.84114355240019e-06, "loss": 95.0093, "step": 62070 }, { "epoch": 0.5135459320842123, "grad_norm": 570.7285766601562, "learning_rate": 5.839753252448115e-06, "loss": 88.7862, "step": 62080 }, { "epoch": 0.5136286553335815, "grad_norm": 1217.79931640625, "learning_rate": 5.838362885680728e-06, "loss": 94.9879, "step": 62090 }, { "epoch": 0.5137113785829507, "grad_norm": 1030.6907958984375, "learning_rate": 5.8369724522086545e-06, "loss": 88.0829, "step": 62100 }, { "epoch": 0.51379410183232, "grad_norm": 2263.951416015625, "learning_rate": 5.835581952142522e-06, "loss": 147.4353, "step": 62110 }, { "epoch": 0.5138768250816892, "grad_norm": 499.37347412109375, "learning_rate": 5.834191385592969e-06, "loss": 119.7903, "step": 62120 }, { "epoch": 0.5139595483310584, "grad_norm": 1055.9906005859375, "learning_rate": 5.8328007526706354e-06, "loss": 87.058, "step": 62130 }, { "epoch": 0.5140422715804277, "grad_norm": 1002.3789672851562, "learning_rate": 5.83141005348617e-06, "loss": 107.1515, "step": 62140 }, { "epoch": 0.5141249948297969, "grad_norm": 859.0476684570312, "learning_rate": 5.830019288150222e-06, "loss": 90.1526, "step": 62150 }, { "epoch": 0.5142077180791661, "grad_norm": 765.904052734375, "learning_rate": 5.8286284567734456e-06, "loss": 81.6313, "step": 62160 }, { "epoch": 0.5142904413285354, "grad_norm": 676.10009765625, "learning_rate": 5.827237559466508e-06, "loss": 84.7288, "step": 62170 }, { "epoch": 0.5143731645779046, "grad_norm": 1681.114990234375, "learning_rate": 5.825846596340075e-06, "loss": 122.0725, "step": 62180 }, { "epoch": 0.5144558878272738, "grad_norm": 708.9700927734375, "learning_rate": 5.824455567504817e-06, "loss": 123.205, "step": 62190 }, { "epoch": 0.5145386110766431, "grad_norm": 853.5513305664062, "learning_rate": 5.823064473071414e-06, "loss": 96.5516, "step": 62200 }, { "epoch": 0.5146213343260123, "grad_norm": 937.1123046875, "learning_rate": 5.821673313150546e-06, "loss": 98.6421, "step": 62210 }, { "epoch": 0.5147040575753815, "grad_norm": 802.1367797851562, "learning_rate": 5.820282087852906e-06, "loss": 99.0799, "step": 62220 }, { "epoch": 0.5147867808247508, "grad_norm": 451.8292236328125, "learning_rate": 5.818890797289185e-06, "loss": 90.0844, "step": 62230 }, { "epoch": 0.51486950407412, "grad_norm": 1798.909912109375, "learning_rate": 5.81749944157008e-06, "loss": 137.7885, "step": 62240 }, { "epoch": 0.5149522273234892, "grad_norm": 985.250732421875, "learning_rate": 5.816108020806297e-06, "loss": 97.535, "step": 62250 }, { "epoch": 0.5150349505728585, "grad_norm": 672.7926635742188, "learning_rate": 5.814716535108545e-06, "loss": 88.2172, "step": 62260 }, { "epoch": 0.5151176738222277, "grad_norm": 716.781982421875, "learning_rate": 5.813324984587536e-06, "loss": 106.1954, "step": 62270 }, { "epoch": 0.5152003970715969, "grad_norm": 1432.325439453125, "learning_rate": 5.811933369353992e-06, "loss": 111.049, "step": 62280 }, { "epoch": 0.5152831203209662, "grad_norm": 760.8700561523438, "learning_rate": 5.810541689518634e-06, "loss": 94.8973, "step": 62290 }, { "epoch": 0.5153658435703354, "grad_norm": 845.1988525390625, "learning_rate": 5.809149945192194e-06, "loss": 62.3175, "step": 62300 }, { "epoch": 0.5154485668197046, "grad_norm": 770.4382934570312, "learning_rate": 5.807758136485409e-06, "loss": 106.6364, "step": 62310 }, { "epoch": 0.5155312900690739, "grad_norm": 1148.9901123046875, "learning_rate": 5.8063662635090136e-06, "loss": 98.8217, "step": 62320 }, { "epoch": 0.5156140133184431, "grad_norm": 1040.4677734375, "learning_rate": 5.804974326373756e-06, "loss": 92.393, "step": 62330 }, { "epoch": 0.5156967365678123, "grad_norm": 609.6980590820312, "learning_rate": 5.803582325190387e-06, "loss": 97.1185, "step": 62340 }, { "epoch": 0.5157794598171817, "grad_norm": 799.3777465820312, "learning_rate": 5.802190260069657e-06, "loss": 87.1282, "step": 62350 }, { "epoch": 0.5158621830665509, "grad_norm": 968.39990234375, "learning_rate": 5.800798131122332e-06, "loss": 86.2219, "step": 62360 }, { "epoch": 0.51594490631592, "grad_norm": 1028.2894287109375, "learning_rate": 5.799405938459175e-06, "loss": 76.0357, "step": 62370 }, { "epoch": 0.5160276295652894, "grad_norm": 1118.3125, "learning_rate": 5.7980136821909565e-06, "loss": 91.065, "step": 62380 }, { "epoch": 0.5161103528146586, "grad_norm": 817.6798706054688, "learning_rate": 5.79662136242845e-06, "loss": 112.0168, "step": 62390 }, { "epoch": 0.5161930760640278, "grad_norm": 847.1541748046875, "learning_rate": 5.795228979282439e-06, "loss": 78.081, "step": 62400 }, { "epoch": 0.5162757993133971, "grad_norm": 474.493896484375, "learning_rate": 5.793836532863707e-06, "loss": 70.5183, "step": 62410 }, { "epoch": 0.5163585225627663, "grad_norm": 720.2383422851562, "learning_rate": 5.792444023283046e-06, "loss": 87.0467, "step": 62420 }, { "epoch": 0.5164412458121355, "grad_norm": 847.5111083984375, "learning_rate": 5.791051450651251e-06, "loss": 80.678, "step": 62430 }, { "epoch": 0.5165239690615048, "grad_norm": 1734.1534423828125, "learning_rate": 5.789658815079121e-06, "loss": 106.6263, "step": 62440 }, { "epoch": 0.516606692310874, "grad_norm": 418.1882019042969, "learning_rate": 5.788266116677464e-06, "loss": 88.7378, "step": 62450 }, { "epoch": 0.5166894155602432, "grad_norm": 1164.404541015625, "learning_rate": 5.78687335555709e-06, "loss": 109.2202, "step": 62460 }, { "epoch": 0.5167721388096125, "grad_norm": 843.881103515625, "learning_rate": 5.785480531828815e-06, "loss": 95.9288, "step": 62470 }, { "epoch": 0.5168548620589817, "grad_norm": 753.1651000976562, "learning_rate": 5.784087645603459e-06, "loss": 88.32, "step": 62480 }, { "epoch": 0.5169375853083509, "grad_norm": 488.68487548828125, "learning_rate": 5.782694696991845e-06, "loss": 77.3577, "step": 62490 }, { "epoch": 0.5170203085577202, "grad_norm": 748.185546875, "learning_rate": 5.781301686104808e-06, "loss": 99.6195, "step": 62500 }, { "epoch": 0.5171030318070894, "grad_norm": 1028.9573974609375, "learning_rate": 5.779908613053181e-06, "loss": 89.3196, "step": 62510 }, { "epoch": 0.5171857550564586, "grad_norm": 823.2115478515625, "learning_rate": 5.778515477947807e-06, "loss": 85.4209, "step": 62520 }, { "epoch": 0.5172684783058279, "grad_norm": 720.9722900390625, "learning_rate": 5.777122280899527e-06, "loss": 87.5316, "step": 62530 }, { "epoch": 0.5173512015551971, "grad_norm": 393.12646484375, "learning_rate": 5.775729022019193e-06, "loss": 132.2376, "step": 62540 }, { "epoch": 0.5174339248045663, "grad_norm": 517.8870849609375, "learning_rate": 5.774335701417662e-06, "loss": 74.0599, "step": 62550 }, { "epoch": 0.5175166480539356, "grad_norm": 779.7863159179688, "learning_rate": 5.7729423192057936e-06, "loss": 104.9648, "step": 62560 }, { "epoch": 0.5175993713033048, "grad_norm": 1225.2161865234375, "learning_rate": 5.771548875494453e-06, "loss": 73.9343, "step": 62570 }, { "epoch": 0.517682094552674, "grad_norm": 1787.88232421875, "learning_rate": 5.7701553703945055e-06, "loss": 99.922, "step": 62580 }, { "epoch": 0.5177648178020433, "grad_norm": 761.9515380859375, "learning_rate": 5.768761804016833e-06, "loss": 126.8296, "step": 62590 }, { "epoch": 0.5178475410514125, "grad_norm": 700.2348022460938, "learning_rate": 5.767368176472311e-06, "loss": 77.3953, "step": 62600 }, { "epoch": 0.5179302643007817, "grad_norm": 1287.00732421875, "learning_rate": 5.765974487871826e-06, "loss": 110.9408, "step": 62610 }, { "epoch": 0.518012987550151, "grad_norm": 1209.4278564453125, "learning_rate": 5.764580738326265e-06, "loss": 122.3806, "step": 62620 }, { "epoch": 0.5180957107995202, "grad_norm": 972.08251953125, "learning_rate": 5.763186927946523e-06, "loss": 96.5278, "step": 62630 }, { "epoch": 0.5181784340488894, "grad_norm": 939.2523193359375, "learning_rate": 5.761793056843501e-06, "loss": 96.9909, "step": 62640 }, { "epoch": 0.5182611572982587, "grad_norm": 729.4473876953125, "learning_rate": 5.760399125128102e-06, "loss": 98.4378, "step": 62650 }, { "epoch": 0.5183438805476279, "grad_norm": 780.3011474609375, "learning_rate": 5.759005132911233e-06, "loss": 78.8927, "step": 62660 }, { "epoch": 0.5184266037969971, "grad_norm": 546.9750366210938, "learning_rate": 5.75761108030381e-06, "loss": 97.8188, "step": 62670 }, { "epoch": 0.5185093270463664, "grad_norm": 898.6033325195312, "learning_rate": 5.756216967416749e-06, "loss": 73.9803, "step": 62680 }, { "epoch": 0.5185920502957356, "grad_norm": 1270.0867919921875, "learning_rate": 5.754822794360976e-06, "loss": 100.2555, "step": 62690 }, { "epoch": 0.5186747735451048, "grad_norm": 799.2184448242188, "learning_rate": 5.753428561247416e-06, "loss": 104.4311, "step": 62700 }, { "epoch": 0.5187574967944741, "grad_norm": 756.2783813476562, "learning_rate": 5.752034268187005e-06, "loss": 90.7448, "step": 62710 }, { "epoch": 0.5188402200438433, "grad_norm": 673.90576171875, "learning_rate": 5.750639915290677e-06, "loss": 100.3478, "step": 62720 }, { "epoch": 0.5189229432932125, "grad_norm": 961.5318603515625, "learning_rate": 5.749245502669375e-06, "loss": 88.5219, "step": 62730 }, { "epoch": 0.5190056665425818, "grad_norm": 700.3480834960938, "learning_rate": 5.747851030434049e-06, "loss": 85.1043, "step": 62740 }, { "epoch": 0.519088389791951, "grad_norm": 508.70166015625, "learning_rate": 5.746456498695648e-06, "loss": 95.531, "step": 62750 }, { "epoch": 0.5191711130413202, "grad_norm": 490.99737548828125, "learning_rate": 5.7450619075651305e-06, "loss": 87.5613, "step": 62760 }, { "epoch": 0.5192538362906896, "grad_norm": 1027.1727294921875, "learning_rate": 5.743667257153454e-06, "loss": 81.3641, "step": 62770 }, { "epoch": 0.5193365595400588, "grad_norm": 1106.285400390625, "learning_rate": 5.742272547571588e-06, "loss": 113.6713, "step": 62780 }, { "epoch": 0.519419282789428, "grad_norm": 657.1253662109375, "learning_rate": 5.740877778930503e-06, "loss": 92.9361, "step": 62790 }, { "epoch": 0.5195020060387973, "grad_norm": 831.34521484375, "learning_rate": 5.739482951341172e-06, "loss": 120.2162, "step": 62800 }, { "epoch": 0.5195847292881665, "grad_norm": 709.2473754882812, "learning_rate": 5.738088064914576e-06, "loss": 112.4378, "step": 62810 }, { "epoch": 0.5196674525375357, "grad_norm": 847.8458862304688, "learning_rate": 5.7366931197617e-06, "loss": 85.5061, "step": 62820 }, { "epoch": 0.5197501757869049, "grad_norm": 1197.6585693359375, "learning_rate": 5.735298115993535e-06, "loss": 93.4385, "step": 62830 }, { "epoch": 0.5198328990362742, "grad_norm": 1183.1817626953125, "learning_rate": 5.733903053721072e-06, "loss": 123.1538, "step": 62840 }, { "epoch": 0.5199156222856434, "grad_norm": 1146.695068359375, "learning_rate": 5.732507933055311e-06, "loss": 133.2361, "step": 62850 }, { "epoch": 0.5199983455350126, "grad_norm": 830.4008178710938, "learning_rate": 5.731112754107257e-06, "loss": 92.5354, "step": 62860 }, { "epoch": 0.5200810687843819, "grad_norm": 1141.0968017578125, "learning_rate": 5.729717516987916e-06, "loss": 91.7082, "step": 62870 }, { "epoch": 0.5201637920337511, "grad_norm": 860.5159301757812, "learning_rate": 5.7283222218083e-06, "loss": 84.899, "step": 62880 }, { "epoch": 0.5202465152831203, "grad_norm": 632.0242309570312, "learning_rate": 5.726926868679429e-06, "loss": 127.0738, "step": 62890 }, { "epoch": 0.5203292385324896, "grad_norm": 877.5741577148438, "learning_rate": 5.725531457712321e-06, "loss": 98.0383, "step": 62900 }, { "epoch": 0.5204119617818588, "grad_norm": 955.6021728515625, "learning_rate": 5.724135989018007e-06, "loss": 93.8473, "step": 62910 }, { "epoch": 0.520494685031228, "grad_norm": 1114.900634765625, "learning_rate": 5.722740462707515e-06, "loss": 97.0482, "step": 62920 }, { "epoch": 0.5205774082805973, "grad_norm": 1169.9739990234375, "learning_rate": 5.72134487889188e-06, "loss": 75.5919, "step": 62930 }, { "epoch": 0.5206601315299665, "grad_norm": 1042.204833984375, "learning_rate": 5.719949237682145e-06, "loss": 96.2548, "step": 62940 }, { "epoch": 0.5207428547793357, "grad_norm": 923.940185546875, "learning_rate": 5.718553539189353e-06, "loss": 120.747, "step": 62950 }, { "epoch": 0.520825578028705, "grad_norm": 773.2394409179688, "learning_rate": 5.717157783524553e-06, "loss": 107.0883, "step": 62960 }, { "epoch": 0.5209083012780742, "grad_norm": 1147.978759765625, "learning_rate": 5.7157619707988e-06, "loss": 112.3523, "step": 62970 }, { "epoch": 0.5209910245274434, "grad_norm": 869.9029541015625, "learning_rate": 5.714366101123152e-06, "loss": 91.1115, "step": 62980 }, { "epoch": 0.5210737477768127, "grad_norm": 503.1574401855469, "learning_rate": 5.712970174608671e-06, "loss": 62.8698, "step": 62990 }, { "epoch": 0.5211564710261819, "grad_norm": 567.345947265625, "learning_rate": 5.711574191366427e-06, "loss": 87.9423, "step": 63000 }, { "epoch": 0.5212391942755511, "grad_norm": 706.41748046875, "learning_rate": 5.710178151507488e-06, "loss": 73.9817, "step": 63010 }, { "epoch": 0.5213219175249204, "grad_norm": 1259.374755859375, "learning_rate": 5.708782055142934e-06, "loss": 91.3289, "step": 63020 }, { "epoch": 0.5214046407742896, "grad_norm": 1063.552001953125, "learning_rate": 5.707385902383845e-06, "loss": 120.5428, "step": 63030 }, { "epoch": 0.5214873640236588, "grad_norm": 910.1624145507812, "learning_rate": 5.7059896933413076e-06, "loss": 83.0945, "step": 63040 }, { "epoch": 0.5215700872730281, "grad_norm": 2553.767333984375, "learning_rate": 5.7045934281264085e-06, "loss": 102.2294, "step": 63050 }, { "epoch": 0.5216528105223973, "grad_norm": 1032.9830322265625, "learning_rate": 5.7031971068502425e-06, "loss": 98.3443, "step": 63060 }, { "epoch": 0.5217355337717665, "grad_norm": 837.3251342773438, "learning_rate": 5.701800729623911e-06, "loss": 105.6823, "step": 63070 }, { "epoch": 0.5218182570211358, "grad_norm": 470.49658203125, "learning_rate": 5.700404296558518e-06, "loss": 94.3055, "step": 63080 }, { "epoch": 0.521900980270505, "grad_norm": 722.3120727539062, "learning_rate": 5.699007807765169e-06, "loss": 96.7037, "step": 63090 }, { "epoch": 0.5219837035198742, "grad_norm": 1043.8006591796875, "learning_rate": 5.6976112633549764e-06, "loss": 63.1547, "step": 63100 }, { "epoch": 0.5220664267692435, "grad_norm": 559.216552734375, "learning_rate": 5.696214663439055e-06, "loss": 89.2568, "step": 63110 }, { "epoch": 0.5221491500186127, "grad_norm": 1489.68994140625, "learning_rate": 5.694818008128531e-06, "loss": 106.3069, "step": 63120 }, { "epoch": 0.5222318732679819, "grad_norm": 1087.22802734375, "learning_rate": 5.693421297534526e-06, "loss": 116.6925, "step": 63130 }, { "epoch": 0.5223145965173512, "grad_norm": 1571.4100341796875, "learning_rate": 5.69202453176817e-06, "loss": 104.0308, "step": 63140 }, { "epoch": 0.5223973197667204, "grad_norm": 1171.318359375, "learning_rate": 5.6906277109406e-06, "loss": 85.1603, "step": 63150 }, { "epoch": 0.5224800430160896, "grad_norm": 1105.7423095703125, "learning_rate": 5.689230835162949e-06, "loss": 92.7724, "step": 63160 }, { "epoch": 0.5225627662654589, "grad_norm": 2501.460693359375, "learning_rate": 5.687833904546367e-06, "loss": 124.3585, "step": 63170 }, { "epoch": 0.5226454895148281, "grad_norm": 673.11328125, "learning_rate": 5.686436919201996e-06, "loss": 95.2214, "step": 63180 }, { "epoch": 0.5227282127641973, "grad_norm": 803.44970703125, "learning_rate": 5.68503987924099e-06, "loss": 96.271, "step": 63190 }, { "epoch": 0.5228109360135667, "grad_norm": 733.796875, "learning_rate": 5.683642784774506e-06, "loss": 94.5968, "step": 63200 }, { "epoch": 0.5228936592629359, "grad_norm": 893.05615234375, "learning_rate": 5.682245635913701e-06, "loss": 93.2139, "step": 63210 }, { "epoch": 0.522976382512305, "grad_norm": 806.1270751953125, "learning_rate": 5.680848432769743e-06, "loss": 83.2208, "step": 63220 }, { "epoch": 0.5230591057616744, "grad_norm": 950.8448486328125, "learning_rate": 5.6794511754538005e-06, "loss": 102.1387, "step": 63230 }, { "epoch": 0.5231418290110436, "grad_norm": 994.8265991210938, "learning_rate": 5.6780538640770455e-06, "loss": 94.9941, "step": 63240 }, { "epoch": 0.5232245522604128, "grad_norm": 805.738037109375, "learning_rate": 5.6766564987506564e-06, "loss": 97.3443, "step": 63250 }, { "epoch": 0.5233072755097821, "grad_norm": 1825.2171630859375, "learning_rate": 5.675259079585816e-06, "loss": 93.6461, "step": 63260 }, { "epoch": 0.5233899987591513, "grad_norm": 899.5811767578125, "learning_rate": 5.673861606693708e-06, "loss": 124.3475, "step": 63270 }, { "epoch": 0.5234727220085205, "grad_norm": 738.1337890625, "learning_rate": 5.672464080185526e-06, "loss": 82.6199, "step": 63280 }, { "epoch": 0.5235554452578898, "grad_norm": 1002.6314086914062, "learning_rate": 5.671066500172462e-06, "loss": 118.9046, "step": 63290 }, { "epoch": 0.523638168507259, "grad_norm": 644.3387451171875, "learning_rate": 5.669668866765717e-06, "loss": 127.9811, "step": 63300 }, { "epoch": 0.5237208917566282, "grad_norm": 654.7725219726562, "learning_rate": 5.6682711800764935e-06, "loss": 93.9227, "step": 63310 }, { "epoch": 0.5238036150059975, "grad_norm": 973.210205078125, "learning_rate": 5.6668734402159994e-06, "loss": 84.845, "step": 63320 }, { "epoch": 0.5238863382553667, "grad_norm": 1666.18017578125, "learning_rate": 5.6654756472954464e-06, "loss": 133.6506, "step": 63330 }, { "epoch": 0.5239690615047359, "grad_norm": 1285.848388671875, "learning_rate": 5.66407780142605e-06, "loss": 102.1989, "step": 63340 }, { "epoch": 0.5240517847541052, "grad_norm": 1405.201904296875, "learning_rate": 5.66267990271903e-06, "loss": 101.3061, "step": 63350 }, { "epoch": 0.5241345080034744, "grad_norm": 995.1788940429688, "learning_rate": 5.661281951285613e-06, "loss": 80.424, "step": 63360 }, { "epoch": 0.5242172312528436, "grad_norm": 1280.261962890625, "learning_rate": 5.6598839472370245e-06, "loss": 129.4751, "step": 63370 }, { "epoch": 0.5242999545022129, "grad_norm": 1111.2105712890625, "learning_rate": 5.6584858906845e-06, "loss": 111.6386, "step": 63380 }, { "epoch": 0.5243826777515821, "grad_norm": 438.5060119628906, "learning_rate": 5.657087781739274e-06, "loss": 88.3789, "step": 63390 }, { "epoch": 0.5244654010009513, "grad_norm": 1246.3992919921875, "learning_rate": 5.6556896205125896e-06, "loss": 113.5103, "step": 63400 }, { "epoch": 0.5245481242503206, "grad_norm": 867.26220703125, "learning_rate": 5.654291407115692e-06, "loss": 90.075, "step": 63410 }, { "epoch": 0.5246308474996898, "grad_norm": 922.8386840820312, "learning_rate": 5.652893141659829e-06, "loss": 103.2854, "step": 63420 }, { "epoch": 0.524713570749059, "grad_norm": 1118.366455078125, "learning_rate": 5.651494824256256e-06, "loss": 92.4039, "step": 63430 }, { "epoch": 0.5247962939984283, "grad_norm": 1036.559326171875, "learning_rate": 5.650096455016227e-06, "loss": 77.7015, "step": 63440 }, { "epoch": 0.5248790172477975, "grad_norm": 649.5186157226562, "learning_rate": 5.648698034051009e-06, "loss": 93.3135, "step": 63450 }, { "epoch": 0.5249617404971667, "grad_norm": 685.0455322265625, "learning_rate": 5.647299561471865e-06, "loss": 88.9663, "step": 63460 }, { "epoch": 0.525044463746536, "grad_norm": 786.32470703125, "learning_rate": 5.645901037390067e-06, "loss": 108.5513, "step": 63470 }, { "epoch": 0.5251271869959052, "grad_norm": 863.156005859375, "learning_rate": 5.644502461916886e-06, "loss": 86.3591, "step": 63480 }, { "epoch": 0.5252099102452744, "grad_norm": 911.6554565429688, "learning_rate": 5.643103835163602e-06, "loss": 92.7152, "step": 63490 }, { "epoch": 0.5252926334946437, "grad_norm": 1143.3475341796875, "learning_rate": 5.641705157241497e-06, "loss": 126.4204, "step": 63500 }, { "epoch": 0.5253753567440129, "grad_norm": 1113.570556640625, "learning_rate": 5.64030642826186e-06, "loss": 106.2012, "step": 63510 }, { "epoch": 0.5254580799933821, "grad_norm": 734.0611572265625, "learning_rate": 5.6389076483359774e-06, "loss": 85.6654, "step": 63520 }, { "epoch": 0.5255408032427513, "grad_norm": 762.94677734375, "learning_rate": 5.637508817575145e-06, "loss": 111.5038, "step": 63530 }, { "epoch": 0.5256235264921206, "grad_norm": 730.315673828125, "learning_rate": 5.636109936090661e-06, "loss": 118.9952, "step": 63540 }, { "epoch": 0.5257062497414898, "grad_norm": 1400.8131103515625, "learning_rate": 5.634711003993832e-06, "loss": 109.5317, "step": 63550 }, { "epoch": 0.525788972990859, "grad_norm": 822.1387329101562, "learning_rate": 5.633312021395959e-06, "loss": 115.5602, "step": 63560 }, { "epoch": 0.5258716962402283, "grad_norm": 836.3711547851562, "learning_rate": 5.631912988408356e-06, "loss": 110.6894, "step": 63570 }, { "epoch": 0.5259544194895975, "grad_norm": 709.7254028320312, "learning_rate": 5.630513905142334e-06, "loss": 92.7573, "step": 63580 }, { "epoch": 0.5260371427389667, "grad_norm": 633.5755004882812, "learning_rate": 5.629114771709217e-06, "loss": 97.9029, "step": 63590 }, { "epoch": 0.526119865988336, "grad_norm": 1466.9072265625, "learning_rate": 5.627715588220325e-06, "loss": 82.6277, "step": 63600 }, { "epoch": 0.5262025892377052, "grad_norm": 1547.8734130859375, "learning_rate": 5.626316354786982e-06, "loss": 113.497, "step": 63610 }, { "epoch": 0.5262853124870744, "grad_norm": 691.7115478515625, "learning_rate": 5.624917071520524e-06, "loss": 99.4881, "step": 63620 }, { "epoch": 0.5263680357364438, "grad_norm": 1155.3173828125, "learning_rate": 5.62351773853228e-06, "loss": 85.3007, "step": 63630 }, { "epoch": 0.526450758985813, "grad_norm": 1123.159423828125, "learning_rate": 5.6221183559335935e-06, "loss": 103.121, "step": 63640 }, { "epoch": 0.5265334822351821, "grad_norm": 1173.2393798828125, "learning_rate": 5.6207189238358025e-06, "loss": 88.9635, "step": 63650 }, { "epoch": 0.5266162054845515, "grad_norm": 1155.658935546875, "learning_rate": 5.619319442350256e-06, "loss": 100.2915, "step": 63660 }, { "epoch": 0.5266989287339207, "grad_norm": 1091.1961669921875, "learning_rate": 5.617919911588304e-06, "loss": 92.6392, "step": 63670 }, { "epoch": 0.5267816519832899, "grad_norm": 922.4959106445312, "learning_rate": 5.616520331661301e-06, "loss": 112.2287, "step": 63680 }, { "epoch": 0.5268643752326592, "grad_norm": 765.5256958007812, "learning_rate": 5.615120702680604e-06, "loss": 98.105, "step": 63690 }, { "epoch": 0.5269470984820284, "grad_norm": 1066.0155029296875, "learning_rate": 5.6137210247575754e-06, "loss": 103.8472, "step": 63700 }, { "epoch": 0.5270298217313976, "grad_norm": 1942.541015625, "learning_rate": 5.6123212980035825e-06, "loss": 109.4006, "step": 63710 }, { "epoch": 0.5271125449807669, "grad_norm": 926.7212524414062, "learning_rate": 5.610921522529994e-06, "loss": 103.9244, "step": 63720 }, { "epoch": 0.5271952682301361, "grad_norm": 811.9002685546875, "learning_rate": 5.609521698448183e-06, "loss": 96.8324, "step": 63730 }, { "epoch": 0.5272779914795053, "grad_norm": 774.9049682617188, "learning_rate": 5.608121825869528e-06, "loss": 173.0452, "step": 63740 }, { "epoch": 0.5273607147288746, "grad_norm": 1026.793212890625, "learning_rate": 5.60672190490541e-06, "loss": 94.8755, "step": 63750 }, { "epoch": 0.5274434379782438, "grad_norm": 405.6708679199219, "learning_rate": 5.6053219356672155e-06, "loss": 86.6626, "step": 63760 }, { "epoch": 0.527526161227613, "grad_norm": 890.8868408203125, "learning_rate": 5.603921918266332e-06, "loss": 87.5938, "step": 63770 }, { "epoch": 0.5276088844769823, "grad_norm": 737.17236328125, "learning_rate": 5.602521852814152e-06, "loss": 73.9403, "step": 63780 }, { "epoch": 0.5276916077263515, "grad_norm": 809.0032348632812, "learning_rate": 5.6011217394220755e-06, "loss": 79.7671, "step": 63790 }, { "epoch": 0.5277743309757207, "grad_norm": 1906.908203125, "learning_rate": 5.599721578201499e-06, "loss": 82.5073, "step": 63800 }, { "epoch": 0.52785705422509, "grad_norm": 1029.4927978515625, "learning_rate": 5.59832136926383e-06, "loss": 106.927, "step": 63810 }, { "epoch": 0.5279397774744592, "grad_norm": 864.1856079101562, "learning_rate": 5.5969211127204744e-06, "loss": 100.7081, "step": 63820 }, { "epoch": 0.5280225007238284, "grad_norm": 958.0577392578125, "learning_rate": 5.595520808682848e-06, "loss": 81.9101, "step": 63830 }, { "epoch": 0.5281052239731977, "grad_norm": 1057.2666015625, "learning_rate": 5.594120457262361e-06, "loss": 109.2872, "step": 63840 }, { "epoch": 0.5281879472225669, "grad_norm": 963.5335083007812, "learning_rate": 5.592720058570438e-06, "loss": 95.8562, "step": 63850 }, { "epoch": 0.5282706704719361, "grad_norm": 841.0330810546875, "learning_rate": 5.591319612718498e-06, "loss": 76.5216, "step": 63860 }, { "epoch": 0.5283533937213054, "grad_norm": 1039.90771484375, "learning_rate": 5.589919119817971e-06, "loss": 107.0152, "step": 63870 }, { "epoch": 0.5284361169706746, "grad_norm": 1642.884033203125, "learning_rate": 5.588518579980288e-06, "loss": 140.7489, "step": 63880 }, { "epoch": 0.5285188402200438, "grad_norm": 1069.137451171875, "learning_rate": 5.587117993316882e-06, "loss": 101.0652, "step": 63890 }, { "epoch": 0.5286015634694131, "grad_norm": 494.1618957519531, "learning_rate": 5.585717359939192e-06, "loss": 85.4378, "step": 63900 }, { "epoch": 0.5286842867187823, "grad_norm": 1175.2783203125, "learning_rate": 5.584316679958659e-06, "loss": 103.4738, "step": 63910 }, { "epoch": 0.5287670099681515, "grad_norm": 927.4609375, "learning_rate": 5.58291595348673e-06, "loss": 104.5927, "step": 63920 }, { "epoch": 0.5288497332175208, "grad_norm": 832.366455078125, "learning_rate": 5.581515180634853e-06, "loss": 80.5153, "step": 63930 }, { "epoch": 0.52893245646689, "grad_norm": 1159.866943359375, "learning_rate": 5.580114361514484e-06, "loss": 90.9927, "step": 63940 }, { "epoch": 0.5290151797162592, "grad_norm": 938.1612548828125, "learning_rate": 5.5787134962370755e-06, "loss": 67.0976, "step": 63950 }, { "epoch": 0.5290979029656285, "grad_norm": 965.9879150390625, "learning_rate": 5.57731258491409e-06, "loss": 91.4234, "step": 63960 }, { "epoch": 0.5291806262149977, "grad_norm": 816.1832885742188, "learning_rate": 5.575911627656993e-06, "loss": 87.2834, "step": 63970 }, { "epoch": 0.5292633494643669, "grad_norm": 424.278564453125, "learning_rate": 5.5745106245772506e-06, "loss": 82.5088, "step": 63980 }, { "epoch": 0.5293460727137362, "grad_norm": 1383.386962890625, "learning_rate": 5.573109575786334e-06, "loss": 99.203, "step": 63990 }, { "epoch": 0.5294287959631054, "grad_norm": 1369.8514404296875, "learning_rate": 5.571708481395719e-06, "loss": 87.6147, "step": 64000 }, { "epoch": 0.5295115192124746, "grad_norm": 679.2786865234375, "learning_rate": 5.570307341516882e-06, "loss": 109.6216, "step": 64010 }, { "epoch": 0.529594242461844, "grad_norm": 688.9489135742188, "learning_rate": 5.568906156261309e-06, "loss": 78.812, "step": 64020 }, { "epoch": 0.5296769657112131, "grad_norm": 716.52880859375, "learning_rate": 5.567504925740484e-06, "loss": 85.4848, "step": 64030 }, { "epoch": 0.5297596889605823, "grad_norm": 1037.4881591796875, "learning_rate": 5.566103650065897e-06, "loss": 98.9546, "step": 64040 }, { "epoch": 0.5298424122099517, "grad_norm": 835.7286376953125, "learning_rate": 5.564702329349041e-06, "loss": 104.4599, "step": 64050 }, { "epoch": 0.5299251354593209, "grad_norm": 1021.035888671875, "learning_rate": 5.56330096370141e-06, "loss": 82.6299, "step": 64060 }, { "epoch": 0.53000785870869, "grad_norm": 469.7624816894531, "learning_rate": 5.561899553234509e-06, "loss": 90.2693, "step": 64070 }, { "epoch": 0.5300905819580594, "grad_norm": 698.7485961914062, "learning_rate": 5.560498098059838e-06, "loss": 116.0395, "step": 64080 }, { "epoch": 0.5301733052074286, "grad_norm": 1172.5970458984375, "learning_rate": 5.559096598288906e-06, "loss": 70.6558, "step": 64090 }, { "epoch": 0.5302560284567978, "grad_norm": 901.9520263671875, "learning_rate": 5.557695054033223e-06, "loss": 85.0805, "step": 64100 }, { "epoch": 0.5303387517061671, "grad_norm": 1102.83056640625, "learning_rate": 5.556293465404304e-06, "loss": 104.11, "step": 64110 }, { "epoch": 0.5304214749555363, "grad_norm": 862.3082275390625, "learning_rate": 5.554891832513668e-06, "loss": 69.0226, "step": 64120 }, { "epoch": 0.5305041982049055, "grad_norm": 850.2838745117188, "learning_rate": 5.553490155472835e-06, "loss": 88.536, "step": 64130 }, { "epoch": 0.5305869214542748, "grad_norm": 1342.24951171875, "learning_rate": 5.55208843439333e-06, "loss": 83.8514, "step": 64140 }, { "epoch": 0.530669644703644, "grad_norm": 704.4754028320312, "learning_rate": 5.550686669386683e-06, "loss": 52.0388, "step": 64150 }, { "epoch": 0.5307523679530132, "grad_norm": 709.6826171875, "learning_rate": 5.549284860564425e-06, "loss": 151.5818, "step": 64160 }, { "epoch": 0.5308350912023825, "grad_norm": 891.2298583984375, "learning_rate": 5.547883008038091e-06, "loss": 132.4302, "step": 64170 }, { "epoch": 0.5309178144517517, "grad_norm": 668.7271728515625, "learning_rate": 5.54648111191922e-06, "loss": 102.1614, "step": 64180 }, { "epoch": 0.5310005377011209, "grad_norm": 1067.2647705078125, "learning_rate": 5.545079172319355e-06, "loss": 157.2434, "step": 64190 }, { "epoch": 0.5310832609504902, "grad_norm": 731.7130737304688, "learning_rate": 5.543677189350043e-06, "loss": 86.4254, "step": 64200 }, { "epoch": 0.5311659841998594, "grad_norm": 895.8660888671875, "learning_rate": 5.542275163122831e-06, "loss": 81.3476, "step": 64210 }, { "epoch": 0.5312487074492286, "grad_norm": 549.8636474609375, "learning_rate": 5.540873093749274e-06, "loss": 91.9869, "step": 64220 }, { "epoch": 0.5313314306985979, "grad_norm": 711.4556274414062, "learning_rate": 5.539470981340926e-06, "loss": 80.405, "step": 64230 }, { "epoch": 0.5314141539479671, "grad_norm": 565.5111083984375, "learning_rate": 5.538068826009349e-06, "loss": 95.2233, "step": 64240 }, { "epoch": 0.5314968771973363, "grad_norm": 1099.053955078125, "learning_rate": 5.536666627866104e-06, "loss": 78.3078, "step": 64250 }, { "epoch": 0.5315796004467055, "grad_norm": 1019.4757080078125, "learning_rate": 5.53526438702276e-06, "loss": 100.4306, "step": 64260 }, { "epoch": 0.5316623236960748, "grad_norm": 684.2471313476562, "learning_rate": 5.533862103590883e-06, "loss": 81.8137, "step": 64270 }, { "epoch": 0.531745046945444, "grad_norm": 1522.2894287109375, "learning_rate": 5.532459777682051e-06, "loss": 85.9613, "step": 64280 }, { "epoch": 0.5318277701948132, "grad_norm": 403.1702880859375, "learning_rate": 5.5310574094078365e-06, "loss": 84.0117, "step": 64290 }, { "epoch": 0.5319104934441825, "grad_norm": 679.674072265625, "learning_rate": 5.529654998879821e-06, "loss": 59.2508, "step": 64300 }, { "epoch": 0.5319932166935517, "grad_norm": 565.5519409179688, "learning_rate": 5.528252546209588e-06, "loss": 76.0347, "step": 64310 }, { "epoch": 0.5320759399429209, "grad_norm": 939.0302124023438, "learning_rate": 5.526850051508725e-06, "loss": 84.2198, "step": 64320 }, { "epoch": 0.5321586631922902, "grad_norm": 1065.8779296875, "learning_rate": 5.525447514888822e-06, "loss": 146.9439, "step": 64330 }, { "epoch": 0.5322413864416594, "grad_norm": 1845.5435791015625, "learning_rate": 5.52404493646147e-06, "loss": 111.1832, "step": 64340 }, { "epoch": 0.5323241096910286, "grad_norm": 1164.8885498046875, "learning_rate": 5.522642316338268e-06, "loss": 89.9883, "step": 64350 }, { "epoch": 0.5324068329403979, "grad_norm": 661.2352294921875, "learning_rate": 5.521239654630816e-06, "loss": 92.0326, "step": 64360 }, { "epoch": 0.5324895561897671, "grad_norm": 995.6377563476562, "learning_rate": 5.519836951450716e-06, "loss": 120.3884, "step": 64370 }, { "epoch": 0.5325722794391363, "grad_norm": 664.2215576171875, "learning_rate": 5.518434206909577e-06, "loss": 102.107, "step": 64380 }, { "epoch": 0.5326550026885056, "grad_norm": 934.103759765625, "learning_rate": 5.517031421119006e-06, "loss": 73.7082, "step": 64390 }, { "epoch": 0.5327377259378748, "grad_norm": 702.5838012695312, "learning_rate": 5.5156285941906175e-06, "loss": 93.3802, "step": 64400 }, { "epoch": 0.532820449187244, "grad_norm": 627.2533569335938, "learning_rate": 5.51422572623603e-06, "loss": 101.0479, "step": 64410 }, { "epoch": 0.5329031724366133, "grad_norm": 362.7051696777344, "learning_rate": 5.512822817366859e-06, "loss": 79.3406, "step": 64420 }, { "epoch": 0.5329858956859825, "grad_norm": 895.7844848632812, "learning_rate": 5.511419867694733e-06, "loss": 91.442, "step": 64430 }, { "epoch": 0.5330686189353517, "grad_norm": 726.357421875, "learning_rate": 5.510016877331271e-06, "loss": 93.3173, "step": 64440 }, { "epoch": 0.533151342184721, "grad_norm": 779.744873046875, "learning_rate": 5.50861384638811e-06, "loss": 88.6204, "step": 64450 }, { "epoch": 0.5332340654340902, "grad_norm": 591.9418334960938, "learning_rate": 5.50721077497688e-06, "loss": 72.3884, "step": 64460 }, { "epoch": 0.5333167886834594, "grad_norm": 1127.8675537109375, "learning_rate": 5.505807663209215e-06, "loss": 124.7344, "step": 64470 }, { "epoch": 0.5333995119328288, "grad_norm": 932.3980102539062, "learning_rate": 5.504404511196755e-06, "loss": 89.4976, "step": 64480 }, { "epoch": 0.533482235182198, "grad_norm": 972.6319580078125, "learning_rate": 5.503001319051142e-06, "loss": 84.1025, "step": 64490 }, { "epoch": 0.5335649584315671, "grad_norm": 898.1183471679688, "learning_rate": 5.5015980868840254e-06, "loss": 126.5537, "step": 64500 }, { "epoch": 0.5336476816809365, "grad_norm": 1008.814208984375, "learning_rate": 5.500194814807051e-06, "loss": 94.5552, "step": 64510 }, { "epoch": 0.5337304049303057, "grad_norm": 1275.499267578125, "learning_rate": 5.498791502931868e-06, "loss": 98.7025, "step": 64520 }, { "epoch": 0.5338131281796749, "grad_norm": 939.2733154296875, "learning_rate": 5.497388151370136e-06, "loss": 120.4424, "step": 64530 }, { "epoch": 0.5338958514290442, "grad_norm": 2313.487548828125, "learning_rate": 5.495984760233511e-06, "loss": 104.2078, "step": 64540 }, { "epoch": 0.5339785746784134, "grad_norm": 2875.86474609375, "learning_rate": 5.494581329633656e-06, "loss": 118.5582, "step": 64550 }, { "epoch": 0.5340612979277826, "grad_norm": 1355.4534912109375, "learning_rate": 5.493177859682234e-06, "loss": 119.3427, "step": 64560 }, { "epoch": 0.5341440211771519, "grad_norm": 643.4739990234375, "learning_rate": 5.491774350490912e-06, "loss": 76.2629, "step": 64570 }, { "epoch": 0.5342267444265211, "grad_norm": 814.0101928710938, "learning_rate": 5.490370802171362e-06, "loss": 102.4891, "step": 64580 }, { "epoch": 0.5343094676758903, "grad_norm": 653.1177978515625, "learning_rate": 5.488967214835259e-06, "loss": 104.6328, "step": 64590 }, { "epoch": 0.5343921909252596, "grad_norm": 650.8169555664062, "learning_rate": 5.487563588594278e-06, "loss": 106.7863, "step": 64600 }, { "epoch": 0.5344749141746288, "grad_norm": 875.9237060546875, "learning_rate": 5.4861599235601e-06, "loss": 78.3624, "step": 64610 }, { "epoch": 0.534557637423998, "grad_norm": 1083.6778564453125, "learning_rate": 5.484756219844408e-06, "loss": 117.263, "step": 64620 }, { "epoch": 0.5346403606733673, "grad_norm": 751.8464965820312, "learning_rate": 5.483352477558889e-06, "loss": 113.5279, "step": 64630 }, { "epoch": 0.5347230839227365, "grad_norm": 1062.31982421875, "learning_rate": 5.48194869681523e-06, "loss": 103.9293, "step": 64640 }, { "epoch": 0.5348058071721057, "grad_norm": 515.4086303710938, "learning_rate": 5.480544877725127e-06, "loss": 97.1701, "step": 64650 }, { "epoch": 0.534888530421475, "grad_norm": 725.5648193359375, "learning_rate": 5.479141020400271e-06, "loss": 76.7176, "step": 64660 }, { "epoch": 0.5349712536708442, "grad_norm": 719.1748657226562, "learning_rate": 5.477737124952366e-06, "loss": 96.5835, "step": 64670 }, { "epoch": 0.5350539769202134, "grad_norm": 916.6452026367188, "learning_rate": 5.476333191493108e-06, "loss": 85.3995, "step": 64680 }, { "epoch": 0.5351367001695827, "grad_norm": 436.0997619628906, "learning_rate": 5.474929220134205e-06, "loss": 86.9272, "step": 64690 }, { "epoch": 0.5352194234189519, "grad_norm": 1398.6942138671875, "learning_rate": 5.473525210987363e-06, "loss": 91.1325, "step": 64700 }, { "epoch": 0.5353021466683211, "grad_norm": 1058.8551025390625, "learning_rate": 5.472121164164295e-06, "loss": 80.0483, "step": 64710 }, { "epoch": 0.5353848699176904, "grad_norm": 784.1776123046875, "learning_rate": 5.47071707977671e-06, "loss": 115.0421, "step": 64720 }, { "epoch": 0.5354675931670596, "grad_norm": 1069.0155029296875, "learning_rate": 5.46931295793633e-06, "loss": 97.4913, "step": 64730 }, { "epoch": 0.5355503164164288, "grad_norm": 1227.8470458984375, "learning_rate": 5.46790879875487e-06, "loss": 98.9259, "step": 64740 }, { "epoch": 0.5356330396657981, "grad_norm": 632.8845825195312, "learning_rate": 5.466504602344055e-06, "loss": 74.2294, "step": 64750 }, { "epoch": 0.5357157629151673, "grad_norm": 1411.2496337890625, "learning_rate": 5.465100368815609e-06, "loss": 109.9609, "step": 64760 }, { "epoch": 0.5357984861645365, "grad_norm": 819.7656860351562, "learning_rate": 5.463696098281262e-06, "loss": 113.5461, "step": 64770 }, { "epoch": 0.5358812094139058, "grad_norm": 576.60546875, "learning_rate": 5.462291790852744e-06, "loss": 83.2793, "step": 64780 }, { "epoch": 0.535963932663275, "grad_norm": 563.931640625, "learning_rate": 5.46088744664179e-06, "loss": 112.9462, "step": 64790 }, { "epoch": 0.5360466559126442, "grad_norm": 1462.51416015625, "learning_rate": 5.459483065760138e-06, "loss": 93.2692, "step": 64800 }, { "epoch": 0.5361293791620135, "grad_norm": 717.8792114257812, "learning_rate": 5.458078648319526e-06, "loss": 72.0933, "step": 64810 }, { "epoch": 0.5362121024113827, "grad_norm": 873.50341796875, "learning_rate": 5.456674194431698e-06, "loss": 112.2229, "step": 64820 }, { "epoch": 0.5362948256607519, "grad_norm": 710.7631225585938, "learning_rate": 5.455269704208401e-06, "loss": 66.035, "step": 64830 }, { "epoch": 0.5363775489101212, "grad_norm": 909.28564453125, "learning_rate": 5.453865177761384e-06, "loss": 100.6332, "step": 64840 }, { "epoch": 0.5364602721594904, "grad_norm": 1102.3038330078125, "learning_rate": 5.4524606152023975e-06, "loss": 100.337, "step": 64850 }, { "epoch": 0.5365429954088596, "grad_norm": 2068.275390625, "learning_rate": 5.4510560166431935e-06, "loss": 116.3672, "step": 64860 }, { "epoch": 0.536625718658229, "grad_norm": 770.456787109375, "learning_rate": 5.449651382195535e-06, "loss": 93.339, "step": 64870 }, { "epoch": 0.5367084419075981, "grad_norm": 614.5863037109375, "learning_rate": 5.448246711971178e-06, "loss": 112.4163, "step": 64880 }, { "epoch": 0.5367911651569673, "grad_norm": 853.5326538085938, "learning_rate": 5.44684200608189e-06, "loss": 110.5457, "step": 64890 }, { "epoch": 0.5368738884063367, "grad_norm": 718.2283935546875, "learning_rate": 5.445437264639433e-06, "loss": 94.2823, "step": 64900 }, { "epoch": 0.5369566116557059, "grad_norm": 723.5383911132812, "learning_rate": 5.444032487755575e-06, "loss": 93.2752, "step": 64910 }, { "epoch": 0.537039334905075, "grad_norm": 694.3563842773438, "learning_rate": 5.442627675542092e-06, "loss": 79.1228, "step": 64920 }, { "epoch": 0.5371220581544444, "grad_norm": 1094.6685791015625, "learning_rate": 5.441222828110756e-06, "loss": 97.7163, "step": 64930 }, { "epoch": 0.5372047814038136, "grad_norm": 809.086669921875, "learning_rate": 5.439817945573345e-06, "loss": 78.5984, "step": 64940 }, { "epoch": 0.5372875046531828, "grad_norm": 1463.13037109375, "learning_rate": 5.438413028041637e-06, "loss": 137.7098, "step": 64950 }, { "epoch": 0.5373702279025521, "grad_norm": 686.939697265625, "learning_rate": 5.4370080756274155e-06, "loss": 119.1664, "step": 64960 }, { "epoch": 0.5374529511519213, "grad_norm": 1107.8900146484375, "learning_rate": 5.435603088442471e-06, "loss": 95.0079, "step": 64970 }, { "epoch": 0.5375356744012905, "grad_norm": 705.9915161132812, "learning_rate": 5.434198066598585e-06, "loss": 78.4787, "step": 64980 }, { "epoch": 0.5376183976506597, "grad_norm": 954.3821411132812, "learning_rate": 5.4327930102075525e-06, "loss": 89.3804, "step": 64990 }, { "epoch": 0.537701120900029, "grad_norm": 947.5150146484375, "learning_rate": 5.431387919381166e-06, "loss": 84.6999, "step": 65000 }, { "epoch": 0.5377838441493982, "grad_norm": 356.02276611328125, "learning_rate": 5.429982794231221e-06, "loss": 107.2157, "step": 65010 }, { "epoch": 0.5378665673987674, "grad_norm": 1482.7928466796875, "learning_rate": 5.428577634869521e-06, "loss": 92.7534, "step": 65020 }, { "epoch": 0.5379492906481367, "grad_norm": 966.0228271484375, "learning_rate": 5.427172441407864e-06, "loss": 112.3402, "step": 65030 }, { "epoch": 0.5380320138975059, "grad_norm": 688.48583984375, "learning_rate": 5.425767213958057e-06, "loss": 67.1227, "step": 65040 }, { "epoch": 0.5381147371468751, "grad_norm": 847.5924072265625, "learning_rate": 5.424361952631907e-06, "loss": 125.7834, "step": 65050 }, { "epoch": 0.5381974603962444, "grad_norm": 2318.634033203125, "learning_rate": 5.422956657541224e-06, "loss": 103.7072, "step": 65060 }, { "epoch": 0.5382801836456136, "grad_norm": 1350.766357421875, "learning_rate": 5.421551328797821e-06, "loss": 96.6112, "step": 65070 }, { "epoch": 0.5383629068949828, "grad_norm": 693.9425659179688, "learning_rate": 5.420145966513513e-06, "loss": 59.4491, "step": 65080 }, { "epoch": 0.5384456301443521, "grad_norm": 1209.9505615234375, "learning_rate": 5.418740570800117e-06, "loss": 100.2799, "step": 65090 }, { "epoch": 0.5385283533937213, "grad_norm": 918.7635498046875, "learning_rate": 5.4173351417694575e-06, "loss": 98.6169, "step": 65100 }, { "epoch": 0.5386110766430905, "grad_norm": 681.4931640625, "learning_rate": 5.415929679533356e-06, "loss": 95.7693, "step": 65110 }, { "epoch": 0.5386937998924598, "grad_norm": 583.2526245117188, "learning_rate": 5.414524184203638e-06, "loss": 91.5053, "step": 65120 }, { "epoch": 0.538776523141829, "grad_norm": 2256.79736328125, "learning_rate": 5.4131186558921335e-06, "loss": 111.124, "step": 65130 }, { "epoch": 0.5388592463911982, "grad_norm": 1016.837890625, "learning_rate": 5.411713094710673e-06, "loss": 98.1128, "step": 65140 }, { "epoch": 0.5389419696405675, "grad_norm": 914.6116943359375, "learning_rate": 5.410307500771092e-06, "loss": 74.292, "step": 65150 }, { "epoch": 0.5390246928899367, "grad_norm": 1791.11962890625, "learning_rate": 5.4089018741852264e-06, "loss": 103.5447, "step": 65160 }, { "epoch": 0.5391074161393059, "grad_norm": 1446.0499267578125, "learning_rate": 5.407496215064915e-06, "loss": 91.7238, "step": 65170 }, { "epoch": 0.5391901393886752, "grad_norm": 865.380859375, "learning_rate": 5.406090523521999e-06, "loss": 127.9576, "step": 65180 }, { "epoch": 0.5392728626380444, "grad_norm": 1105.864013671875, "learning_rate": 5.404684799668325e-06, "loss": 94.1973, "step": 65190 }, { "epoch": 0.5393555858874136, "grad_norm": 675.471435546875, "learning_rate": 5.403279043615738e-06, "loss": 110.8662, "step": 65200 }, { "epoch": 0.5394383091367829, "grad_norm": 0.0, "learning_rate": 5.4018732554760875e-06, "loss": 74.0708, "step": 65210 }, { "epoch": 0.5395210323861521, "grad_norm": 609.0394897460938, "learning_rate": 5.400467435361227e-06, "loss": 110.9384, "step": 65220 }, { "epoch": 0.5396037556355213, "grad_norm": 674.0571899414062, "learning_rate": 5.399061583383013e-06, "loss": 129.5516, "step": 65230 }, { "epoch": 0.5396864788848906, "grad_norm": 1786.3568115234375, "learning_rate": 5.3976556996532965e-06, "loss": 100.5763, "step": 65240 }, { "epoch": 0.5397692021342598, "grad_norm": 1053.052001953125, "learning_rate": 5.396249784283943e-06, "loss": 79.2251, "step": 65250 }, { "epoch": 0.539851925383629, "grad_norm": 659.5496215820312, "learning_rate": 5.394843837386812e-06, "loss": 93.9248, "step": 65260 }, { "epoch": 0.5399346486329983, "grad_norm": 1476.619140625, "learning_rate": 5.39343785907377e-06, "loss": 69.9831, "step": 65270 }, { "epoch": 0.5400173718823675, "grad_norm": 873.2113647460938, "learning_rate": 5.392031849456683e-06, "loss": 97.2353, "step": 65280 }, { "epoch": 0.5401000951317367, "grad_norm": 674.3704223632812, "learning_rate": 5.39062580864742e-06, "loss": 83.2568, "step": 65290 }, { "epoch": 0.540182818381106, "grad_norm": 2080.84765625, "learning_rate": 5.3892197367578535e-06, "loss": 76.2265, "step": 65300 }, { "epoch": 0.5402655416304752, "grad_norm": 442.13330078125, "learning_rate": 5.38781363389986e-06, "loss": 95.2125, "step": 65310 }, { "epoch": 0.5403482648798444, "grad_norm": 638.2518920898438, "learning_rate": 5.386407500185316e-06, "loss": 72.7291, "step": 65320 }, { "epoch": 0.5404309881292138, "grad_norm": 841.8578491210938, "learning_rate": 5.3850013357261e-06, "loss": 85.5195, "step": 65330 }, { "epoch": 0.540513711378583, "grad_norm": 486.00140380859375, "learning_rate": 5.383595140634093e-06, "loss": 103.8048, "step": 65340 }, { "epoch": 0.5405964346279521, "grad_norm": 945.3947143554688, "learning_rate": 5.382188915021182e-06, "loss": 105.8814, "step": 65350 }, { "epoch": 0.5406791578773215, "grad_norm": 620.21435546875, "learning_rate": 5.380782658999256e-06, "loss": 61.2339, "step": 65360 }, { "epoch": 0.5407618811266907, "grad_norm": 1061.0386962890625, "learning_rate": 5.379376372680199e-06, "loss": 87.4281, "step": 65370 }, { "epoch": 0.5408446043760599, "grad_norm": 906.3760986328125, "learning_rate": 5.377970056175905e-06, "loss": 96.8989, "step": 65380 }, { "epoch": 0.5409273276254292, "grad_norm": 621.90283203125, "learning_rate": 5.376563709598267e-06, "loss": 70.7825, "step": 65390 }, { "epoch": 0.5410100508747984, "grad_norm": 818.9396362304688, "learning_rate": 5.3751573330591855e-06, "loss": 101.6279, "step": 65400 }, { "epoch": 0.5410927741241676, "grad_norm": 859.1585693359375, "learning_rate": 5.3737509266705555e-06, "loss": 107.53, "step": 65410 }, { "epoch": 0.5411754973735369, "grad_norm": 423.7478942871094, "learning_rate": 5.37234449054428e-06, "loss": 88.1217, "step": 65420 }, { "epoch": 0.5412582206229061, "grad_norm": 1409.0037841796875, "learning_rate": 5.370938024792262e-06, "loss": 91.726, "step": 65430 }, { "epoch": 0.5413409438722753, "grad_norm": 1655.0394287109375, "learning_rate": 5.369531529526406e-06, "loss": 95.8715, "step": 65440 }, { "epoch": 0.5414236671216446, "grad_norm": 825.59326171875, "learning_rate": 5.3681250048586246e-06, "loss": 93.8956, "step": 65450 }, { "epoch": 0.5415063903710138, "grad_norm": 1459.590576171875, "learning_rate": 5.366718450900825e-06, "loss": 122.376, "step": 65460 }, { "epoch": 0.541589113620383, "grad_norm": 586.8179321289062, "learning_rate": 5.365311867764922e-06, "loss": 80.8746, "step": 65470 }, { "epoch": 0.5416718368697523, "grad_norm": 973.2362060546875, "learning_rate": 5.363905255562828e-06, "loss": 163.7341, "step": 65480 }, { "epoch": 0.5417545601191215, "grad_norm": 664.8019409179688, "learning_rate": 5.362498614406466e-06, "loss": 145.3861, "step": 65490 }, { "epoch": 0.5418372833684907, "grad_norm": 776.0678100585938, "learning_rate": 5.361091944407751e-06, "loss": 91.7549, "step": 65500 }, { "epoch": 0.54192000661786, "grad_norm": 998.9888305664062, "learning_rate": 5.3596852456786075e-06, "loss": 107.9666, "step": 65510 }, { "epoch": 0.5420027298672292, "grad_norm": 866.9566040039062, "learning_rate": 5.35827851833096e-06, "loss": 93.1625, "step": 65520 }, { "epoch": 0.5420854531165984, "grad_norm": 372.51800537109375, "learning_rate": 5.356871762476735e-06, "loss": 89.3893, "step": 65530 }, { "epoch": 0.5421681763659677, "grad_norm": 1260.4573974609375, "learning_rate": 5.355464978227861e-06, "loss": 117.8481, "step": 65540 }, { "epoch": 0.5422508996153369, "grad_norm": 911.54296875, "learning_rate": 5.354058165696271e-06, "loss": 107.3121, "step": 65550 }, { "epoch": 0.5423336228647061, "grad_norm": 1118.6298828125, "learning_rate": 5.352651324993897e-06, "loss": 120.4601, "step": 65560 }, { "epoch": 0.5424163461140754, "grad_norm": 873.2643432617188, "learning_rate": 5.351244456232676e-06, "loss": 133.8039, "step": 65570 }, { "epoch": 0.5424990693634446, "grad_norm": 803.547119140625, "learning_rate": 5.349837559524546e-06, "loss": 113.3396, "step": 65580 }, { "epoch": 0.5425817926128138, "grad_norm": 838.561767578125, "learning_rate": 5.3484306349814455e-06, "loss": 106.6709, "step": 65590 }, { "epoch": 0.5426645158621831, "grad_norm": 929.80029296875, "learning_rate": 5.34702368271532e-06, "loss": 93.5446, "step": 65600 }, { "epoch": 0.5427472391115523, "grad_norm": 1133.1529541015625, "learning_rate": 5.345616702838111e-06, "loss": 77.5028, "step": 65610 }, { "epoch": 0.5428299623609215, "grad_norm": 945.4187622070312, "learning_rate": 5.344209695461768e-06, "loss": 85.2792, "step": 65620 }, { "epoch": 0.5429126856102908, "grad_norm": 798.3004150390625, "learning_rate": 5.3428026606982396e-06, "loss": 75.1708, "step": 65630 }, { "epoch": 0.54299540885966, "grad_norm": 840.9337158203125, "learning_rate": 5.341395598659477e-06, "loss": 139.3945, "step": 65640 }, { "epoch": 0.5430781321090292, "grad_norm": 7416.7548828125, "learning_rate": 5.339988509457432e-06, "loss": 101.7888, "step": 65650 }, { "epoch": 0.5431608553583985, "grad_norm": 914.0125732421875, "learning_rate": 5.338581393204064e-06, "loss": 94.0368, "step": 65660 }, { "epoch": 0.5432435786077677, "grad_norm": 1436.60888671875, "learning_rate": 5.337174250011326e-06, "loss": 123.4113, "step": 65670 }, { "epoch": 0.5433263018571369, "grad_norm": 1032.3555908203125, "learning_rate": 5.3357670799911805e-06, "loss": 87.8645, "step": 65680 }, { "epoch": 0.5434090251065062, "grad_norm": 958.6307983398438, "learning_rate": 5.334359883255591e-06, "loss": 83.8339, "step": 65690 }, { "epoch": 0.5434917483558754, "grad_norm": 968.6178588867188, "learning_rate": 5.33295265991652e-06, "loss": 102.6183, "step": 65700 }, { "epoch": 0.5435744716052446, "grad_norm": 601.3204345703125, "learning_rate": 5.331545410085933e-06, "loss": 74.607, "step": 65710 }, { "epoch": 0.5436571948546138, "grad_norm": 1049.9119873046875, "learning_rate": 5.330138133875799e-06, "loss": 78.9104, "step": 65720 }, { "epoch": 0.5437399181039831, "grad_norm": 879.0147705078125, "learning_rate": 5.328730831398089e-06, "loss": 92.8446, "step": 65730 }, { "epoch": 0.5438226413533523, "grad_norm": 1260.738525390625, "learning_rate": 5.3273235027647764e-06, "loss": 81.406, "step": 65740 }, { "epoch": 0.5439053646027215, "grad_norm": 807.532958984375, "learning_rate": 5.3259161480878354e-06, "loss": 80.0483, "step": 65750 }, { "epoch": 0.5439880878520909, "grad_norm": 628.903076171875, "learning_rate": 5.324508767479239e-06, "loss": 111.094, "step": 65760 }, { "epoch": 0.54407081110146, "grad_norm": 1162.60888671875, "learning_rate": 5.323101361050972e-06, "loss": 102.2003, "step": 65770 }, { "epoch": 0.5441535343508292, "grad_norm": 840.9710693359375, "learning_rate": 5.321693928915012e-06, "loss": 85.5873, "step": 65780 }, { "epoch": 0.5442362576001986, "grad_norm": 588.482666015625, "learning_rate": 5.320286471183343e-06, "loss": 88.2541, "step": 65790 }, { "epoch": 0.5443189808495678, "grad_norm": 826.7291259765625, "learning_rate": 5.3188789879679496e-06, "loss": 113.2529, "step": 65800 }, { "epoch": 0.544401704098937, "grad_norm": 988.3714599609375, "learning_rate": 5.317471479380816e-06, "loss": 92.3975, "step": 65810 }, { "epoch": 0.5444844273483063, "grad_norm": 1229.006103515625, "learning_rate": 5.3160639455339355e-06, "loss": 99.8077, "step": 65820 }, { "epoch": 0.5445671505976755, "grad_norm": 558.47802734375, "learning_rate": 5.314656386539298e-06, "loss": 98.0959, "step": 65830 }, { "epoch": 0.5446498738470447, "grad_norm": 1046.564697265625, "learning_rate": 5.313248802508896e-06, "loss": 94.008, "step": 65840 }, { "epoch": 0.544732597096414, "grad_norm": 943.0609741210938, "learning_rate": 5.311841193554723e-06, "loss": 94.6098, "step": 65850 }, { "epoch": 0.5448153203457832, "grad_norm": 465.5363464355469, "learning_rate": 5.310433559788778e-06, "loss": 100.7524, "step": 65860 }, { "epoch": 0.5448980435951524, "grad_norm": 902.67333984375, "learning_rate": 5.309025901323059e-06, "loss": 67.7871, "step": 65870 }, { "epoch": 0.5449807668445217, "grad_norm": 641.2943725585938, "learning_rate": 5.307618218269569e-06, "loss": 89.4344, "step": 65880 }, { "epoch": 0.5450634900938909, "grad_norm": 730.5675048828125, "learning_rate": 5.306210510740307e-06, "loss": 78.7692, "step": 65890 }, { "epoch": 0.5451462133432601, "grad_norm": 562.7182006835938, "learning_rate": 5.304802778847281e-06, "loss": 62.6107, "step": 65900 }, { "epoch": 0.5452289365926294, "grad_norm": 776.1548461914062, "learning_rate": 5.303395022702495e-06, "loss": 83.2003, "step": 65910 }, { "epoch": 0.5453116598419986, "grad_norm": 716.1808471679688, "learning_rate": 5.301987242417963e-06, "loss": 77.2049, "step": 65920 }, { "epoch": 0.5453943830913678, "grad_norm": 1019.53759765625, "learning_rate": 5.300579438105689e-06, "loss": 108.0206, "step": 65930 }, { "epoch": 0.5454771063407371, "grad_norm": 559.3324584960938, "learning_rate": 5.29917160987769e-06, "loss": 83.0377, "step": 65940 }, { "epoch": 0.5455598295901063, "grad_norm": 785.0572509765625, "learning_rate": 5.297763757845979e-06, "loss": 88.7534, "step": 65950 }, { "epoch": 0.5456425528394755, "grad_norm": 606.4642944335938, "learning_rate": 5.296355882122572e-06, "loss": 82.7635, "step": 65960 }, { "epoch": 0.5457252760888448, "grad_norm": 1158.042236328125, "learning_rate": 5.294947982819488e-06, "loss": 108.4545, "step": 65970 }, { "epoch": 0.545807999338214, "grad_norm": 412.3054504394531, "learning_rate": 5.293540060048746e-06, "loss": 116.0526, "step": 65980 }, { "epoch": 0.5458907225875832, "grad_norm": 661.3358154296875, "learning_rate": 5.292132113922369e-06, "loss": 74.4473, "step": 65990 }, { "epoch": 0.5459734458369525, "grad_norm": 637.5306396484375, "learning_rate": 5.290724144552379e-06, "loss": 104.4344, "step": 66000 }, { "epoch": 0.5460561690863217, "grad_norm": 1194.7322998046875, "learning_rate": 5.2893161520508055e-06, "loss": 93.1337, "step": 66010 }, { "epoch": 0.5461388923356909, "grad_norm": 1273.340576171875, "learning_rate": 5.287908136529671e-06, "loss": 102.8886, "step": 66020 }, { "epoch": 0.5462216155850602, "grad_norm": 923.6551513671875, "learning_rate": 5.28650009810101e-06, "loss": 125.1421, "step": 66030 }, { "epoch": 0.5463043388344294, "grad_norm": 996.6854248046875, "learning_rate": 5.28509203687685e-06, "loss": 93.7317, "step": 66040 }, { "epoch": 0.5463870620837986, "grad_norm": 655.3250122070312, "learning_rate": 5.283683952969224e-06, "loss": 98.5502, "step": 66050 }, { "epoch": 0.5464697853331679, "grad_norm": 724.341064453125, "learning_rate": 5.282275846490169e-06, "loss": 76.2478, "step": 66060 }, { "epoch": 0.5465525085825371, "grad_norm": 665.1072998046875, "learning_rate": 5.280867717551719e-06, "loss": 90.0001, "step": 66070 }, { "epoch": 0.5466352318319063, "grad_norm": 774.87451171875, "learning_rate": 5.279459566265915e-06, "loss": 83.016, "step": 66080 }, { "epoch": 0.5467179550812756, "grad_norm": 1207.277587890625, "learning_rate": 5.278051392744796e-06, "loss": 115.3367, "step": 66090 }, { "epoch": 0.5468006783306448, "grad_norm": 846.2992553710938, "learning_rate": 5.2766431971004025e-06, "loss": 76.7401, "step": 66100 }, { "epoch": 0.546883401580014, "grad_norm": 794.3187866210938, "learning_rate": 5.275234979444781e-06, "loss": 104.4632, "step": 66110 }, { "epoch": 0.5469661248293833, "grad_norm": 609.81787109375, "learning_rate": 5.273826739889975e-06, "loss": 93.3402, "step": 66120 }, { "epoch": 0.5470488480787525, "grad_norm": 818.2113647460938, "learning_rate": 5.272418478548031e-06, "loss": 77.1904, "step": 66130 }, { "epoch": 0.5471315713281217, "grad_norm": 2090.50390625, "learning_rate": 5.271010195530999e-06, "loss": 101.5279, "step": 66140 }, { "epoch": 0.547214294577491, "grad_norm": 1038.06103515625, "learning_rate": 5.26960189095093e-06, "loss": 95.1865, "step": 66150 }, { "epoch": 0.5472970178268602, "grad_norm": 984.3367309570312, "learning_rate": 5.268193564919876e-06, "loss": 91.2734, "step": 66160 }, { "epoch": 0.5473797410762294, "grad_norm": 1194.6197509765625, "learning_rate": 5.26678521754989e-06, "loss": 116.3886, "step": 66170 }, { "epoch": 0.5474624643255988, "grad_norm": 978.132080078125, "learning_rate": 5.265376848953031e-06, "loss": 114.3536, "step": 66180 }, { "epoch": 0.547545187574968, "grad_norm": 1035.0242919921875, "learning_rate": 5.263968459241351e-06, "loss": 89.8195, "step": 66190 }, { "epoch": 0.5476279108243371, "grad_norm": 1031.2353515625, "learning_rate": 5.262560048526913e-06, "loss": 80.5158, "step": 66200 }, { "epoch": 0.5477106340737065, "grad_norm": 308.54156494140625, "learning_rate": 5.261151616921778e-06, "loss": 95.6141, "step": 66210 }, { "epoch": 0.5477933573230757, "grad_norm": 1179.9146728515625, "learning_rate": 5.259743164538008e-06, "loss": 115.8417, "step": 66220 }, { "epoch": 0.5478760805724449, "grad_norm": 1147.329345703125, "learning_rate": 5.2583346914876655e-06, "loss": 81.3131, "step": 66230 }, { "epoch": 0.5479588038218142, "grad_norm": 823.7957153320312, "learning_rate": 5.2569261978828155e-06, "loss": 151.2819, "step": 66240 }, { "epoch": 0.5480415270711834, "grad_norm": 466.2208557128906, "learning_rate": 5.255517683835528e-06, "loss": 84.0521, "step": 66250 }, { "epoch": 0.5481242503205526, "grad_norm": 1068.723876953125, "learning_rate": 5.254109149457873e-06, "loss": 93.1827, "step": 66260 }, { "epoch": 0.5482069735699219, "grad_norm": 1123.1505126953125, "learning_rate": 5.252700594861918e-06, "loss": 107.1272, "step": 66270 }, { "epoch": 0.5482896968192911, "grad_norm": 1002.3335571289062, "learning_rate": 5.251292020159736e-06, "loss": 88.9466, "step": 66280 }, { "epoch": 0.5483724200686603, "grad_norm": 658.9024047851562, "learning_rate": 5.2498834254634005e-06, "loss": 87.6717, "step": 66290 }, { "epoch": 0.5484551433180296, "grad_norm": 1128.9300537109375, "learning_rate": 5.248474810884988e-06, "loss": 105.05, "step": 66300 }, { "epoch": 0.5485378665673988, "grad_norm": 853.0302734375, "learning_rate": 5.247066176536577e-06, "loss": 136.7064, "step": 66310 }, { "epoch": 0.548620589816768, "grad_norm": 1292.0511474609375, "learning_rate": 5.245657522530243e-06, "loss": 88.9545, "step": 66320 }, { "epoch": 0.5487033130661373, "grad_norm": 1117.95947265625, "learning_rate": 5.244248848978067e-06, "loss": 74.3471, "step": 66330 }, { "epoch": 0.5487860363155065, "grad_norm": 1050.1473388671875, "learning_rate": 5.242840155992131e-06, "loss": 104.2194, "step": 66340 }, { "epoch": 0.5488687595648757, "grad_norm": 462.470458984375, "learning_rate": 5.24143144368452e-06, "loss": 78.9187, "step": 66350 }, { "epoch": 0.548951482814245, "grad_norm": 1257.4759521484375, "learning_rate": 5.240022712167315e-06, "loss": 83.4133, "step": 66360 }, { "epoch": 0.5490342060636142, "grad_norm": 1386.3408203125, "learning_rate": 5.2386139615526046e-06, "loss": 116.1773, "step": 66370 }, { "epoch": 0.5491169293129834, "grad_norm": 742.0828247070312, "learning_rate": 5.237205191952477e-06, "loss": 102.3286, "step": 66380 }, { "epoch": 0.5491996525623527, "grad_norm": 1096.52392578125, "learning_rate": 5.235796403479021e-06, "loss": 80.9894, "step": 66390 }, { "epoch": 0.5492823758117219, "grad_norm": 466.41204833984375, "learning_rate": 5.2343875962443255e-06, "loss": 75.4198, "step": 66400 }, { "epoch": 0.5493650990610911, "grad_norm": 1372.77685546875, "learning_rate": 5.2329787703604875e-06, "loss": 86.9037, "step": 66410 }, { "epoch": 0.5494478223104604, "grad_norm": 733.8797607421875, "learning_rate": 5.231569925939596e-06, "loss": 84.0945, "step": 66420 }, { "epoch": 0.5495305455598296, "grad_norm": 815.2667236328125, "learning_rate": 5.230161063093749e-06, "loss": 83.4252, "step": 66430 }, { "epoch": 0.5496132688091988, "grad_norm": 1322.669921875, "learning_rate": 5.228752181935042e-06, "loss": 100.3188, "step": 66440 }, { "epoch": 0.549695992058568, "grad_norm": 1326.2945556640625, "learning_rate": 5.227343282575574e-06, "loss": 90.2418, "step": 66450 }, { "epoch": 0.5497787153079373, "grad_norm": 1287.527099609375, "learning_rate": 5.225934365127445e-06, "loss": 82.3157, "step": 66460 }, { "epoch": 0.5498614385573065, "grad_norm": 564.8834228515625, "learning_rate": 5.224525429702755e-06, "loss": 67.5519, "step": 66470 }, { "epoch": 0.5499441618066757, "grad_norm": 1590.8048095703125, "learning_rate": 5.223116476413606e-06, "loss": 145.7727, "step": 66480 }, { "epoch": 0.550026885056045, "grad_norm": 424.3537292480469, "learning_rate": 5.221707505372105e-06, "loss": 89.2432, "step": 66490 }, { "epoch": 0.5501096083054142, "grad_norm": 562.13330078125, "learning_rate": 5.220298516690353e-06, "loss": 91.3321, "step": 66500 }, { "epoch": 0.5501923315547834, "grad_norm": 416.3981018066406, "learning_rate": 5.21888951048046e-06, "loss": 114.0917, "step": 66510 }, { "epoch": 0.5502750548041527, "grad_norm": 935.5563354492188, "learning_rate": 5.217480486854534e-06, "loss": 112.0527, "step": 66520 }, { "epoch": 0.5503577780535219, "grad_norm": 448.1026916503906, "learning_rate": 5.216071445924683e-06, "loss": 71.1966, "step": 66530 }, { "epoch": 0.5504405013028911, "grad_norm": 715.922119140625, "learning_rate": 5.214662387803019e-06, "loss": 69.2816, "step": 66540 }, { "epoch": 0.5505232245522604, "grad_norm": 816.540771484375, "learning_rate": 5.213253312601654e-06, "loss": 115.1411, "step": 66550 }, { "epoch": 0.5506059478016296, "grad_norm": 613.8909912109375, "learning_rate": 5.211844220432702e-06, "loss": 74.3888, "step": 66560 }, { "epoch": 0.5506886710509988, "grad_norm": 1052.91064453125, "learning_rate": 5.210435111408276e-06, "loss": 91.0357, "step": 66570 }, { "epoch": 0.5507713943003681, "grad_norm": 1757.2353515625, "learning_rate": 5.209025985640496e-06, "loss": 101.9064, "step": 66580 }, { "epoch": 0.5508541175497373, "grad_norm": 1139.6336669921875, "learning_rate": 5.207616843241476e-06, "loss": 102.2799, "step": 66590 }, { "epoch": 0.5509368407991065, "grad_norm": 1313.9873046875, "learning_rate": 5.206207684323337e-06, "loss": 93.977, "step": 66600 }, { "epoch": 0.5510195640484759, "grad_norm": 574.5587158203125, "learning_rate": 5.2047985089982e-06, "loss": 88.6549, "step": 66610 }, { "epoch": 0.551102287297845, "grad_norm": 881.9164428710938, "learning_rate": 5.203389317378183e-06, "loss": 108.2257, "step": 66620 }, { "epoch": 0.5511850105472142, "grad_norm": 702.6726684570312, "learning_rate": 5.201980109575414e-06, "loss": 102.8949, "step": 66630 }, { "epoch": 0.5512677337965836, "grad_norm": 975.09375, "learning_rate": 5.200570885702013e-06, "loss": 95.4903, "step": 66640 }, { "epoch": 0.5513504570459528, "grad_norm": 502.3410339355469, "learning_rate": 5.19916164587011e-06, "loss": 116.9168, "step": 66650 }, { "epoch": 0.551433180295322, "grad_norm": 2496.73193359375, "learning_rate": 5.197752390191827e-06, "loss": 95.1221, "step": 66660 }, { "epoch": 0.5515159035446913, "grad_norm": 1161.61572265625, "learning_rate": 5.196343118779292e-06, "loss": 103.2815, "step": 66670 }, { "epoch": 0.5515986267940605, "grad_norm": 501.6972961425781, "learning_rate": 5.194933831744637e-06, "loss": 89.6646, "step": 66680 }, { "epoch": 0.5516813500434297, "grad_norm": 808.2440795898438, "learning_rate": 5.1935245291999945e-06, "loss": 132.3228, "step": 66690 }, { "epoch": 0.551764073292799, "grad_norm": 602.8800048828125, "learning_rate": 5.192115211257491e-06, "loss": 100.7651, "step": 66700 }, { "epoch": 0.5518467965421682, "grad_norm": 705.0828247070312, "learning_rate": 5.19070587802926e-06, "loss": 94.9037, "step": 66710 }, { "epoch": 0.5519295197915374, "grad_norm": 685.058349609375, "learning_rate": 5.189296529627441e-06, "loss": 101.691, "step": 66720 }, { "epoch": 0.5520122430409067, "grad_norm": 765.4905395507812, "learning_rate": 5.187887166164165e-06, "loss": 89.7552, "step": 66730 }, { "epoch": 0.5520949662902759, "grad_norm": 695.8983154296875, "learning_rate": 5.186477787751569e-06, "loss": 77.7547, "step": 66740 }, { "epoch": 0.5521776895396451, "grad_norm": 633.330810546875, "learning_rate": 5.185068394501791e-06, "loss": 93.5725, "step": 66750 }, { "epoch": 0.5522604127890144, "grad_norm": 566.3089599609375, "learning_rate": 5.183658986526969e-06, "loss": 86.7824, "step": 66760 }, { "epoch": 0.5523431360383836, "grad_norm": 870.3569946289062, "learning_rate": 5.1822495639392465e-06, "loss": 137.4197, "step": 66770 }, { "epoch": 0.5524258592877528, "grad_norm": 1036.458251953125, "learning_rate": 5.180840126850764e-06, "loss": 80.6906, "step": 66780 }, { "epoch": 0.5525085825371221, "grad_norm": 792.98779296875, "learning_rate": 5.179430675373659e-06, "loss": 91.3037, "step": 66790 }, { "epoch": 0.5525913057864913, "grad_norm": 1176.4154052734375, "learning_rate": 5.17802120962008e-06, "loss": 129.9155, "step": 66800 }, { "epoch": 0.5526740290358605, "grad_norm": 1051.582275390625, "learning_rate": 5.17661172970217e-06, "loss": 73.2745, "step": 66810 }, { "epoch": 0.5527567522852298, "grad_norm": 1047.6094970703125, "learning_rate": 5.175202235732077e-06, "loss": 99.0051, "step": 66820 }, { "epoch": 0.552839475534599, "grad_norm": 871.4730834960938, "learning_rate": 5.1737927278219446e-06, "loss": 86.3209, "step": 66830 }, { "epoch": 0.5529221987839682, "grad_norm": 644.1160888671875, "learning_rate": 5.1723832060839216e-06, "loss": 90.1693, "step": 66840 }, { "epoch": 0.5530049220333375, "grad_norm": 688.15576171875, "learning_rate": 5.170973670630159e-06, "loss": 126.0678, "step": 66850 }, { "epoch": 0.5530876452827067, "grad_norm": 737.1720581054688, "learning_rate": 5.169564121572806e-06, "loss": 83.4195, "step": 66860 }, { "epoch": 0.5531703685320759, "grad_norm": 719.3773803710938, "learning_rate": 5.168154559024014e-06, "loss": 91.4589, "step": 66870 }, { "epoch": 0.5532530917814452, "grad_norm": 1081.61572265625, "learning_rate": 5.166744983095937e-06, "loss": 115.1463, "step": 66880 }, { "epoch": 0.5533358150308144, "grad_norm": 863.379150390625, "learning_rate": 5.165335393900726e-06, "loss": 91.273, "step": 66890 }, { "epoch": 0.5534185382801836, "grad_norm": 1196.47509765625, "learning_rate": 5.163925791550536e-06, "loss": 74.5763, "step": 66900 }, { "epoch": 0.5535012615295529, "grad_norm": 754.5050659179688, "learning_rate": 5.162516176157523e-06, "loss": 78.6246, "step": 66910 }, { "epoch": 0.5535839847789221, "grad_norm": 332.3321228027344, "learning_rate": 5.161106547833843e-06, "loss": 78.1392, "step": 66920 }, { "epoch": 0.5536667080282913, "grad_norm": 459.50701904296875, "learning_rate": 5.159696906691656e-06, "loss": 78.0986, "step": 66930 }, { "epoch": 0.5537494312776606, "grad_norm": 1238.329833984375, "learning_rate": 5.158287252843118e-06, "loss": 105.3073, "step": 66940 }, { "epoch": 0.5538321545270298, "grad_norm": 885.58251953125, "learning_rate": 5.1568775864003894e-06, "loss": 102.8519, "step": 66950 }, { "epoch": 0.553914877776399, "grad_norm": 634.6365966796875, "learning_rate": 5.155467907475632e-06, "loss": 114.2997, "step": 66960 }, { "epoch": 0.5539976010257683, "grad_norm": 957.9181518554688, "learning_rate": 5.154058216181007e-06, "loss": 86.0915, "step": 66970 }, { "epoch": 0.5540803242751375, "grad_norm": 1111.42919921875, "learning_rate": 5.1526485126286766e-06, "loss": 98.9156, "step": 66980 }, { "epoch": 0.5541630475245067, "grad_norm": 1166.4410400390625, "learning_rate": 5.151238796930804e-06, "loss": 125.5735, "step": 66990 }, { "epoch": 0.554245770773876, "grad_norm": 650.7413330078125, "learning_rate": 5.149829069199555e-06, "loss": 83.7539, "step": 67000 }, { "epoch": 0.5543284940232452, "grad_norm": 654.6986694335938, "learning_rate": 5.148419329547094e-06, "loss": 85.525, "step": 67010 }, { "epoch": 0.5544112172726144, "grad_norm": 713.3673706054688, "learning_rate": 5.147009578085589e-06, "loss": 96.1334, "step": 67020 }, { "epoch": 0.5544939405219838, "grad_norm": 1134.079345703125, "learning_rate": 5.145599814927205e-06, "loss": 106.5868, "step": 67030 }, { "epoch": 0.554576663771353, "grad_norm": 782.4326171875, "learning_rate": 5.144190040184114e-06, "loss": 81.4991, "step": 67040 }, { "epoch": 0.5546593870207221, "grad_norm": 885.5360107421875, "learning_rate": 5.142780253968481e-06, "loss": 106.0122, "step": 67050 }, { "epoch": 0.5547421102700915, "grad_norm": 761.8356323242188, "learning_rate": 5.14137045639248e-06, "loss": 106.5318, "step": 67060 }, { "epoch": 0.5548248335194607, "grad_norm": 796.8743896484375, "learning_rate": 5.13996064756828e-06, "loss": 70.0908, "step": 67070 }, { "epoch": 0.5549075567688299, "grad_norm": 1602.5904541015625, "learning_rate": 5.138550827608055e-06, "loss": 97.1062, "step": 67080 }, { "epoch": 0.5549902800181992, "grad_norm": 1278.1387939453125, "learning_rate": 5.137140996623975e-06, "loss": 85.0867, "step": 67090 }, { "epoch": 0.5550730032675684, "grad_norm": 1092.7744140625, "learning_rate": 5.135731154728215e-06, "loss": 95.7226, "step": 67100 }, { "epoch": 0.5551557265169376, "grad_norm": 897.9187622070312, "learning_rate": 5.134321302032951e-06, "loss": 118.3043, "step": 67110 }, { "epoch": 0.5552384497663069, "grad_norm": 1308.584716796875, "learning_rate": 5.1329114386503585e-06, "loss": 136.3629, "step": 67120 }, { "epoch": 0.5553211730156761, "grad_norm": 1024.6846923828125, "learning_rate": 5.131501564692611e-06, "loss": 121.1511, "step": 67130 }, { "epoch": 0.5554038962650453, "grad_norm": 894.513671875, "learning_rate": 5.130091680271887e-06, "loss": 98.0504, "step": 67140 }, { "epoch": 0.5554866195144146, "grad_norm": 1355.4970703125, "learning_rate": 5.128681785500365e-06, "loss": 91.5048, "step": 67150 }, { "epoch": 0.5555693427637838, "grad_norm": 660.9675903320312, "learning_rate": 5.127271880490227e-06, "loss": 107.9526, "step": 67160 }, { "epoch": 0.555652066013153, "grad_norm": 639.190185546875, "learning_rate": 5.125861965353647e-06, "loss": 95.762, "step": 67170 }, { "epoch": 0.5557347892625222, "grad_norm": 1066.5399169921875, "learning_rate": 5.124452040202809e-06, "loss": 72.5684, "step": 67180 }, { "epoch": 0.5558175125118915, "grad_norm": 1595.59033203125, "learning_rate": 5.1230421051498914e-06, "loss": 101.4106, "step": 67190 }, { "epoch": 0.5559002357612607, "grad_norm": 700.05419921875, "learning_rate": 5.121632160307078e-06, "loss": 110.3626, "step": 67200 }, { "epoch": 0.5559829590106299, "grad_norm": 1288.469482421875, "learning_rate": 5.120222205786556e-06, "loss": 134.359, "step": 67210 }, { "epoch": 0.5560656822599992, "grad_norm": 1454.6519775390625, "learning_rate": 5.118812241700501e-06, "loss": 92.2554, "step": 67220 }, { "epoch": 0.5561484055093684, "grad_norm": 937.5780029296875, "learning_rate": 5.117402268161103e-06, "loss": 78.8586, "step": 67230 }, { "epoch": 0.5562311287587376, "grad_norm": 1452.914306640625, "learning_rate": 5.115992285280543e-06, "loss": 90.6466, "step": 67240 }, { "epoch": 0.5563138520081069, "grad_norm": 808.0802612304688, "learning_rate": 5.114582293171012e-06, "loss": 84.3542, "step": 67250 }, { "epoch": 0.5563965752574761, "grad_norm": 1291.392333984375, "learning_rate": 5.113172291944693e-06, "loss": 80.4222, "step": 67260 }, { "epoch": 0.5564792985068453, "grad_norm": 0.0, "learning_rate": 5.111762281713773e-06, "loss": 96.9373, "step": 67270 }, { "epoch": 0.5565620217562146, "grad_norm": 460.95623779296875, "learning_rate": 5.110352262590442e-06, "loss": 98.4892, "step": 67280 }, { "epoch": 0.5566447450055838, "grad_norm": 921.4959106445312, "learning_rate": 5.108942234686889e-06, "loss": 85.5142, "step": 67290 }, { "epoch": 0.556727468254953, "grad_norm": 909.4285278320312, "learning_rate": 5.1075321981153014e-06, "loss": 79.041, "step": 67300 }, { "epoch": 0.5568101915043223, "grad_norm": 786.9864501953125, "learning_rate": 5.106122152987869e-06, "loss": 131.3443, "step": 67310 }, { "epoch": 0.5568929147536915, "grad_norm": 837.8712158203125, "learning_rate": 5.1047120994167855e-06, "loss": 111.7526, "step": 67320 }, { "epoch": 0.5569756380030607, "grad_norm": 748.6630859375, "learning_rate": 5.103302037514241e-06, "loss": 83.1395, "step": 67330 }, { "epoch": 0.55705836125243, "grad_norm": 422.8838195800781, "learning_rate": 5.101891967392426e-06, "loss": 132.004, "step": 67340 }, { "epoch": 0.5571410845017992, "grad_norm": 784.1287231445312, "learning_rate": 5.100481889163535e-06, "loss": 84.0244, "step": 67350 }, { "epoch": 0.5572238077511684, "grad_norm": 871.6853637695312, "learning_rate": 5.099071802939763e-06, "loss": 96.8194, "step": 67360 }, { "epoch": 0.5573065310005377, "grad_norm": 1200.704833984375, "learning_rate": 5.097661708833302e-06, "loss": 101.7852, "step": 67370 }, { "epoch": 0.5573892542499069, "grad_norm": 605.1507568359375, "learning_rate": 5.096251606956345e-06, "loss": 114.6154, "step": 67380 }, { "epoch": 0.5574719774992761, "grad_norm": 1629.7506103515625, "learning_rate": 5.0948414974210906e-06, "loss": 108.8764, "step": 67390 }, { "epoch": 0.5575547007486454, "grad_norm": 963.1193237304688, "learning_rate": 5.093431380339734e-06, "loss": 83.0829, "step": 67400 }, { "epoch": 0.5576374239980146, "grad_norm": 984.2836303710938, "learning_rate": 5.092021255824471e-06, "loss": 90.3172, "step": 67410 }, { "epoch": 0.5577201472473838, "grad_norm": 846.0245971679688, "learning_rate": 5.090611123987498e-06, "loss": 75.3123, "step": 67420 }, { "epoch": 0.5578028704967531, "grad_norm": 880.2362670898438, "learning_rate": 5.089200984941014e-06, "loss": 86.5572, "step": 67430 }, { "epoch": 0.5578855937461223, "grad_norm": 1226.6094970703125, "learning_rate": 5.087790838797217e-06, "loss": 76.6975, "step": 67440 }, { "epoch": 0.5579683169954915, "grad_norm": 1174.342529296875, "learning_rate": 5.0863806856683076e-06, "loss": 97.9613, "step": 67450 }, { "epoch": 0.5580510402448609, "grad_norm": 832.6563110351562, "learning_rate": 5.084970525666481e-06, "loss": 77.5357, "step": 67460 }, { "epoch": 0.55813376349423, "grad_norm": 1207.7735595703125, "learning_rate": 5.083560358903942e-06, "loss": 108.6904, "step": 67470 }, { "epoch": 0.5582164867435992, "grad_norm": 1580.60791015625, "learning_rate": 5.082150185492887e-06, "loss": 123.0784, "step": 67480 }, { "epoch": 0.5582992099929686, "grad_norm": 911.670654296875, "learning_rate": 5.080740005545519e-06, "loss": 81.2963, "step": 67490 }, { "epoch": 0.5583819332423378, "grad_norm": 1346.0203857421875, "learning_rate": 5.07932981917404e-06, "loss": 105.0922, "step": 67500 }, { "epoch": 0.558464656491707, "grad_norm": 1385.7469482421875, "learning_rate": 5.077919626490651e-06, "loss": 111.0049, "step": 67510 }, { "epoch": 0.5585473797410763, "grad_norm": 2228.890380859375, "learning_rate": 5.076509427607555e-06, "loss": 89.5055, "step": 67520 }, { "epoch": 0.5586301029904455, "grad_norm": 507.3185119628906, "learning_rate": 5.075099222636954e-06, "loss": 78.911, "step": 67530 }, { "epoch": 0.5587128262398147, "grad_norm": 551.4053955078125, "learning_rate": 5.073689011691054e-06, "loss": 150.896, "step": 67540 }, { "epoch": 0.558795549489184, "grad_norm": 350.7704162597656, "learning_rate": 5.072278794882058e-06, "loss": 78.5772, "step": 67550 }, { "epoch": 0.5588782727385532, "grad_norm": 682.6192626953125, "learning_rate": 5.07086857232217e-06, "loss": 93.3245, "step": 67560 }, { "epoch": 0.5589609959879224, "grad_norm": 699.5905151367188, "learning_rate": 5.069458344123592e-06, "loss": 109.2068, "step": 67570 }, { "epoch": 0.5590437192372917, "grad_norm": 1194.6055908203125, "learning_rate": 5.068048110398535e-06, "loss": 103.4686, "step": 67580 }, { "epoch": 0.5591264424866609, "grad_norm": 671.2708740234375, "learning_rate": 5.066637871259201e-06, "loss": 69.1066, "step": 67590 }, { "epoch": 0.5592091657360301, "grad_norm": 817.7105102539062, "learning_rate": 5.065227626817798e-06, "loss": 105.476, "step": 67600 }, { "epoch": 0.5592918889853994, "grad_norm": 664.9489135742188, "learning_rate": 5.063817377186531e-06, "loss": 67.6632, "step": 67610 }, { "epoch": 0.5593746122347686, "grad_norm": 1241.629638671875, "learning_rate": 5.062407122477609e-06, "loss": 87.1826, "step": 67620 }, { "epoch": 0.5594573354841378, "grad_norm": 1491.961181640625, "learning_rate": 5.060996862803239e-06, "loss": 78.7851, "step": 67630 }, { "epoch": 0.5595400587335071, "grad_norm": 981.352294921875, "learning_rate": 5.0595865982756284e-06, "loss": 106.6009, "step": 67640 }, { "epoch": 0.5596227819828763, "grad_norm": 695.9772338867188, "learning_rate": 5.0581763290069865e-06, "loss": 85.9514, "step": 67650 }, { "epoch": 0.5597055052322455, "grad_norm": 1229.713134765625, "learning_rate": 5.05676605510952e-06, "loss": 105.0807, "step": 67660 }, { "epoch": 0.5597882284816148, "grad_norm": 534.1005859375, "learning_rate": 5.055355776695437e-06, "loss": 93.8244, "step": 67670 }, { "epoch": 0.559870951730984, "grad_norm": 911.0359497070312, "learning_rate": 5.0539454938769525e-06, "loss": 101.3288, "step": 67680 }, { "epoch": 0.5599536749803532, "grad_norm": 918.2195434570312, "learning_rate": 5.052535206766271e-06, "loss": 89.4984, "step": 67690 }, { "epoch": 0.5600363982297225, "grad_norm": 1059.2137451171875, "learning_rate": 5.051124915475604e-06, "loss": 99.4896, "step": 67700 }, { "epoch": 0.5601191214790917, "grad_norm": 1157.1102294921875, "learning_rate": 5.049714620117162e-06, "loss": 105.2878, "step": 67710 }, { "epoch": 0.5602018447284609, "grad_norm": 666.695068359375, "learning_rate": 5.0483043208031575e-06, "loss": 109.8217, "step": 67720 }, { "epoch": 0.5602845679778302, "grad_norm": 1238.229736328125, "learning_rate": 5.0468940176458e-06, "loss": 94.0387, "step": 67730 }, { "epoch": 0.5603672912271994, "grad_norm": 2138.0751953125, "learning_rate": 5.045483710757298e-06, "loss": 95.2673, "step": 67740 }, { "epoch": 0.5604500144765686, "grad_norm": 914.434326171875, "learning_rate": 5.044073400249867e-06, "loss": 92.1393, "step": 67750 }, { "epoch": 0.5605327377259379, "grad_norm": 1934.4398193359375, "learning_rate": 5.0426630862357176e-06, "loss": 142.6514, "step": 67760 }, { "epoch": 0.5606154609753071, "grad_norm": 1084.5147705078125, "learning_rate": 5.041252768827064e-06, "loss": 133.2949, "step": 67770 }, { "epoch": 0.5606981842246763, "grad_norm": 748.2398071289062, "learning_rate": 5.039842448136115e-06, "loss": 69.7304, "step": 67780 }, { "epoch": 0.5607809074740456, "grad_norm": 804.4089965820312, "learning_rate": 5.038432124275087e-06, "loss": 88.9984, "step": 67790 }, { "epoch": 0.5608636307234148, "grad_norm": 535.5092163085938, "learning_rate": 5.03702179735619e-06, "loss": 88.9917, "step": 67800 }, { "epoch": 0.560946353972784, "grad_norm": 635.5907592773438, "learning_rate": 5.035611467491638e-06, "loss": 92.2593, "step": 67810 }, { "epoch": 0.5610290772221533, "grad_norm": 851.3953247070312, "learning_rate": 5.034201134793646e-06, "loss": 126.7755, "step": 67820 }, { "epoch": 0.5611118004715225, "grad_norm": 780.0504760742188, "learning_rate": 5.032790799374426e-06, "loss": 131.121, "step": 67830 }, { "epoch": 0.5611945237208917, "grad_norm": 1291.3529052734375, "learning_rate": 5.0313804613461925e-06, "loss": 110.4924, "step": 67840 }, { "epoch": 0.561277246970261, "grad_norm": 860.24072265625, "learning_rate": 5.0299701208211605e-06, "loss": 83.46, "step": 67850 }, { "epoch": 0.5613599702196302, "grad_norm": 1199.1927490234375, "learning_rate": 5.028559777911543e-06, "loss": 107.3534, "step": 67860 }, { "epoch": 0.5614426934689994, "grad_norm": 613.6046752929688, "learning_rate": 5.027149432729555e-06, "loss": 88.3334, "step": 67870 }, { "epoch": 0.5615254167183688, "grad_norm": 1034.6842041015625, "learning_rate": 5.025739085387411e-06, "loss": 106.7181, "step": 67880 }, { "epoch": 0.561608139967738, "grad_norm": 968.4409790039062, "learning_rate": 5.024328735997327e-06, "loss": 73.4767, "step": 67890 }, { "epoch": 0.5616908632171072, "grad_norm": 636.9981689453125, "learning_rate": 5.0229183846715154e-06, "loss": 80.4445, "step": 67900 }, { "epoch": 0.5617735864664763, "grad_norm": 762.04931640625, "learning_rate": 5.021508031522195e-06, "loss": 106.2328, "step": 67910 }, { "epoch": 0.5618563097158457, "grad_norm": 1687.2122802734375, "learning_rate": 5.0200976766615785e-06, "loss": 103.8904, "step": 67920 }, { "epoch": 0.5619390329652149, "grad_norm": 1144.8087158203125, "learning_rate": 5.018687320201882e-06, "loss": 88.8534, "step": 67930 }, { "epoch": 0.5620217562145841, "grad_norm": 1060.1658935546875, "learning_rate": 5.017276962255323e-06, "loss": 117.6395, "step": 67940 }, { "epoch": 0.5621044794639534, "grad_norm": 541.5599975585938, "learning_rate": 5.015866602934112e-06, "loss": 81.2837, "step": 67950 }, { "epoch": 0.5621872027133226, "grad_norm": 1171.819091796875, "learning_rate": 5.01445624235047e-06, "loss": 122.8472, "step": 67960 }, { "epoch": 0.5622699259626918, "grad_norm": 569.9471435546875, "learning_rate": 5.013045880616612e-06, "loss": 99.2828, "step": 67970 }, { "epoch": 0.5623526492120611, "grad_norm": 857.0740356445312, "learning_rate": 5.011635517844753e-06, "loss": 79.8719, "step": 67980 }, { "epoch": 0.5624353724614303, "grad_norm": 1649.099853515625, "learning_rate": 5.010225154147107e-06, "loss": 116.2859, "step": 67990 }, { "epoch": 0.5625180957107995, "grad_norm": 991.4071044921875, "learning_rate": 5.008814789635894e-06, "loss": 84.7862, "step": 68000 }, { "epoch": 0.5626008189601688, "grad_norm": 752.6597900390625, "learning_rate": 5.007404424423329e-06, "loss": 98.8501, "step": 68010 }, { "epoch": 0.562683542209538, "grad_norm": 720.8966064453125, "learning_rate": 5.0059940586216284e-06, "loss": 105.6753, "step": 68020 }, { "epoch": 0.5627662654589072, "grad_norm": 720.338134765625, "learning_rate": 5.004583692343007e-06, "loss": 104.9102, "step": 68030 }, { "epoch": 0.5628489887082765, "grad_norm": 3147.158447265625, "learning_rate": 5.003173325699682e-06, "loss": 115.8981, "step": 68040 }, { "epoch": 0.5629317119576457, "grad_norm": 649.7725830078125, "learning_rate": 5.00176295880387e-06, "loss": 99.7788, "step": 68050 }, { "epoch": 0.5630144352070149, "grad_norm": 523.4342651367188, "learning_rate": 5.000352591767787e-06, "loss": 82.1472, "step": 68060 }, { "epoch": 0.5630971584563842, "grad_norm": 704.3622436523438, "learning_rate": 4.998942224703651e-06, "loss": 95.6916, "step": 68070 }, { "epoch": 0.5631798817057534, "grad_norm": 487.66796875, "learning_rate": 4.997531857723678e-06, "loss": 77.8491, "step": 68080 }, { "epoch": 0.5632626049551226, "grad_norm": 959.2645874023438, "learning_rate": 4.996121490940084e-06, "loss": 106.0061, "step": 68090 }, { "epoch": 0.5633453282044919, "grad_norm": 1130.561279296875, "learning_rate": 4.994711124465084e-06, "loss": 86.5281, "step": 68100 }, { "epoch": 0.5634280514538611, "grad_norm": 675.5780639648438, "learning_rate": 4.993300758410895e-06, "loss": 110.0674, "step": 68110 }, { "epoch": 0.5635107747032303, "grad_norm": 833.6633911132812, "learning_rate": 4.991890392889735e-06, "loss": 93.0616, "step": 68120 }, { "epoch": 0.5635934979525996, "grad_norm": 497.2891845703125, "learning_rate": 4.990480028013818e-06, "loss": 102.913, "step": 68130 }, { "epoch": 0.5636762212019688, "grad_norm": 976.9085693359375, "learning_rate": 4.989069663895361e-06, "loss": 94.7653, "step": 68140 }, { "epoch": 0.563758944451338, "grad_norm": 1526.7978515625, "learning_rate": 4.9876593006465825e-06, "loss": 74.5033, "step": 68150 }, { "epoch": 0.5638416677007073, "grad_norm": 903.107177734375, "learning_rate": 4.986248938379696e-06, "loss": 80.8739, "step": 68160 }, { "epoch": 0.5639243909500765, "grad_norm": 1482.0433349609375, "learning_rate": 4.984838577206921e-06, "loss": 106.9815, "step": 68170 }, { "epoch": 0.5640071141994457, "grad_norm": 4656.763671875, "learning_rate": 4.9834282172404665e-06, "loss": 115.4343, "step": 68180 }, { "epoch": 0.564089837448815, "grad_norm": 661.7854614257812, "learning_rate": 4.982017858592555e-06, "loss": 98.6977, "step": 68190 }, { "epoch": 0.5641725606981842, "grad_norm": 654.1882934570312, "learning_rate": 4.980607501375399e-06, "loss": 113.0739, "step": 68200 }, { "epoch": 0.5642552839475534, "grad_norm": 1355.635498046875, "learning_rate": 4.979197145701216e-06, "loss": 125.4591, "step": 68210 }, { "epoch": 0.5643380071969227, "grad_norm": 793.7406005859375, "learning_rate": 4.977786791682221e-06, "loss": 94.5731, "step": 68220 }, { "epoch": 0.5644207304462919, "grad_norm": 819.9609985351562, "learning_rate": 4.976376439430627e-06, "loss": 94.1121, "step": 68230 }, { "epoch": 0.5645034536956611, "grad_norm": 687.6693725585938, "learning_rate": 4.974966089058652e-06, "loss": 91.6705, "step": 68240 }, { "epoch": 0.5645861769450304, "grad_norm": 645.421875, "learning_rate": 4.973555740678512e-06, "loss": 115.3419, "step": 68250 }, { "epoch": 0.5646689001943996, "grad_norm": 675.6929931640625, "learning_rate": 4.972145394402421e-06, "loss": 83.4908, "step": 68260 }, { "epoch": 0.5647516234437688, "grad_norm": 1000.2984008789062, "learning_rate": 4.9707350503425905e-06, "loss": 93.1049, "step": 68270 }, { "epoch": 0.5648343466931381, "grad_norm": 996.8165283203125, "learning_rate": 4.969324708611239e-06, "loss": 87.6225, "step": 68280 }, { "epoch": 0.5649170699425073, "grad_norm": 605.3995361328125, "learning_rate": 4.9679143693205785e-06, "loss": 92.8545, "step": 68290 }, { "epoch": 0.5649997931918765, "grad_norm": 1098.21044921875, "learning_rate": 4.966504032582826e-06, "loss": 129.6778, "step": 68300 }, { "epoch": 0.5650825164412459, "grad_norm": 822.2824096679688, "learning_rate": 4.965093698510192e-06, "loss": 96.9237, "step": 68310 }, { "epoch": 0.565165239690615, "grad_norm": 818.29931640625, "learning_rate": 4.963683367214895e-06, "loss": 88.808, "step": 68320 }, { "epoch": 0.5652479629399842, "grad_norm": 1037.8428955078125, "learning_rate": 4.962273038809143e-06, "loss": 108.7637, "step": 68330 }, { "epoch": 0.5653306861893536, "grad_norm": 614.8590698242188, "learning_rate": 4.960862713405153e-06, "loss": 89.6115, "step": 68340 }, { "epoch": 0.5654134094387228, "grad_norm": 389.6104736328125, "learning_rate": 4.95945239111514e-06, "loss": 93.0896, "step": 68350 }, { "epoch": 0.565496132688092, "grad_norm": 1135.781005859375, "learning_rate": 4.9580420720513115e-06, "loss": 151.4895, "step": 68360 }, { "epoch": 0.5655788559374613, "grad_norm": 816.8753051757812, "learning_rate": 4.956631756325882e-06, "loss": 98.8892, "step": 68370 }, { "epoch": 0.5656615791868305, "grad_norm": 782.5609741210938, "learning_rate": 4.955221444051066e-06, "loss": 77.5715, "step": 68380 }, { "epoch": 0.5657443024361997, "grad_norm": 545.4894409179688, "learning_rate": 4.953811135339073e-06, "loss": 98.956, "step": 68390 }, { "epoch": 0.565827025685569, "grad_norm": 789.0388793945312, "learning_rate": 4.952400830302117e-06, "loss": 128.0591, "step": 68400 }, { "epoch": 0.5659097489349382, "grad_norm": 688.3363037109375, "learning_rate": 4.950990529052409e-06, "loss": 79.7653, "step": 68410 }, { "epoch": 0.5659924721843074, "grad_norm": 1568.041259765625, "learning_rate": 4.949580231702158e-06, "loss": 100.4565, "step": 68420 }, { "epoch": 0.5660751954336767, "grad_norm": 828.0899658203125, "learning_rate": 4.94816993836358e-06, "loss": 102.3955, "step": 68430 }, { "epoch": 0.5661579186830459, "grad_norm": 532.2860107421875, "learning_rate": 4.946759649148879e-06, "loss": 77.4463, "step": 68440 }, { "epoch": 0.5662406419324151, "grad_norm": 925.8582763671875, "learning_rate": 4.945349364170269e-06, "loss": 89.3156, "step": 68450 }, { "epoch": 0.5663233651817844, "grad_norm": 2182.206787109375, "learning_rate": 4.94393908353996e-06, "loss": 113.8303, "step": 68460 }, { "epoch": 0.5664060884311536, "grad_norm": 785.1996459960938, "learning_rate": 4.942528807370158e-06, "loss": 98.9256, "step": 68470 }, { "epoch": 0.5664888116805228, "grad_norm": 636.3436279296875, "learning_rate": 4.941118535773078e-06, "loss": 89.0144, "step": 68480 }, { "epoch": 0.5665715349298921, "grad_norm": 1138.821044921875, "learning_rate": 4.9397082688609245e-06, "loss": 65.4779, "step": 68490 }, { "epoch": 0.5666542581792613, "grad_norm": 704.7152099609375, "learning_rate": 4.938298006745909e-06, "loss": 97.2847, "step": 68500 }, { "epoch": 0.5667369814286305, "grad_norm": 1183.618896484375, "learning_rate": 4.936887749540236e-06, "loss": 85.178, "step": 68510 }, { "epoch": 0.5668197046779998, "grad_norm": 960.0731201171875, "learning_rate": 4.935477497356118e-06, "loss": 87.2744, "step": 68520 }, { "epoch": 0.566902427927369, "grad_norm": 1119.224853515625, "learning_rate": 4.934067250305757e-06, "loss": 104.8437, "step": 68530 }, { "epoch": 0.5669851511767382, "grad_norm": 606.4401245117188, "learning_rate": 4.932657008501362e-06, "loss": 114.7352, "step": 68540 }, { "epoch": 0.5670678744261075, "grad_norm": 829.0963745117188, "learning_rate": 4.931246772055141e-06, "loss": 112.2022, "step": 68550 }, { "epoch": 0.5671505976754767, "grad_norm": 1182.5452880859375, "learning_rate": 4.9298365410792985e-06, "loss": 118.4558, "step": 68560 }, { "epoch": 0.5672333209248459, "grad_norm": 873.345947265625, "learning_rate": 4.928426315686039e-06, "loss": 66.0995, "step": 68570 }, { "epoch": 0.5673160441742152, "grad_norm": 1000.1868896484375, "learning_rate": 4.92701609598757e-06, "loss": 99.3208, "step": 68580 }, { "epoch": 0.5673987674235844, "grad_norm": 782.4760131835938, "learning_rate": 4.925605882096096e-06, "loss": 91.0728, "step": 68590 }, { "epoch": 0.5674814906729536, "grad_norm": 884.9342651367188, "learning_rate": 4.924195674123821e-06, "loss": 89.9195, "step": 68600 }, { "epoch": 0.5675642139223229, "grad_norm": 813.310546875, "learning_rate": 4.922785472182948e-06, "loss": 72.9367, "step": 68610 }, { "epoch": 0.5676469371716921, "grad_norm": 566.2291259765625, "learning_rate": 4.92137527638568e-06, "loss": 67.9247, "step": 68620 }, { "epoch": 0.5677296604210613, "grad_norm": 667.0711059570312, "learning_rate": 4.919965086844221e-06, "loss": 102.4399, "step": 68630 }, { "epoch": 0.5678123836704305, "grad_norm": 680.4052124023438, "learning_rate": 4.9185549036707715e-06, "loss": 74.7348, "step": 68640 }, { "epoch": 0.5678951069197998, "grad_norm": 801.6771240234375, "learning_rate": 4.917144726977535e-06, "loss": 124.5145, "step": 68650 }, { "epoch": 0.567977830169169, "grad_norm": 318.88824462890625, "learning_rate": 4.915734556876713e-06, "loss": 72.6641, "step": 68660 }, { "epoch": 0.5680605534185382, "grad_norm": 822.7666625976562, "learning_rate": 4.914324393480504e-06, "loss": 89.2944, "step": 68670 }, { "epoch": 0.5681432766679075, "grad_norm": 956.2263793945312, "learning_rate": 4.9129142369011105e-06, "loss": 106.6015, "step": 68680 }, { "epoch": 0.5682259999172767, "grad_norm": 811.5741577148438, "learning_rate": 4.911504087250735e-06, "loss": 95.9006, "step": 68690 }, { "epoch": 0.5683087231666459, "grad_norm": 606.1180419921875, "learning_rate": 4.910093944641569e-06, "loss": 75.9035, "step": 68700 }, { "epoch": 0.5683914464160152, "grad_norm": 671.3385620117188, "learning_rate": 4.9086838091858155e-06, "loss": 130.6404, "step": 68710 }, { "epoch": 0.5684741696653844, "grad_norm": 775.1796875, "learning_rate": 4.9072736809956735e-06, "loss": 73.8619, "step": 68720 }, { "epoch": 0.5685568929147536, "grad_norm": 940.5592651367188, "learning_rate": 4.9058635601833384e-06, "loss": 64.6535, "step": 68730 }, { "epoch": 0.568639616164123, "grad_norm": 385.8565673828125, "learning_rate": 4.904453446861008e-06, "loss": 84.9902, "step": 68740 }, { "epoch": 0.5687223394134922, "grad_norm": 1663.734375, "learning_rate": 4.903043341140879e-06, "loss": 100.3515, "step": 68750 }, { "epoch": 0.5688050626628613, "grad_norm": 715.17626953125, "learning_rate": 4.901633243135144e-06, "loss": 88.3827, "step": 68760 }, { "epoch": 0.5688877859122307, "grad_norm": 985.6260375976562, "learning_rate": 4.900223152956003e-06, "loss": 103.8463, "step": 68770 }, { "epoch": 0.5689705091615999, "grad_norm": 981.5747680664062, "learning_rate": 4.898813070715649e-06, "loss": 105.5598, "step": 68780 }, { "epoch": 0.5690532324109691, "grad_norm": 541.0383911132812, "learning_rate": 4.897402996526273e-06, "loss": 91.828, "step": 68790 }, { "epoch": 0.5691359556603384, "grad_norm": 378.74261474609375, "learning_rate": 4.895992930500068e-06, "loss": 94.0054, "step": 68800 }, { "epoch": 0.5692186789097076, "grad_norm": 524.0851440429688, "learning_rate": 4.894582872749229e-06, "loss": 80.3705, "step": 68810 }, { "epoch": 0.5693014021590768, "grad_norm": 640.1279907226562, "learning_rate": 4.893172823385947e-06, "loss": 74.0081, "step": 68820 }, { "epoch": 0.5693841254084461, "grad_norm": 1020.3170776367188, "learning_rate": 4.8917627825224146e-06, "loss": 96.6822, "step": 68830 }, { "epoch": 0.5694668486578153, "grad_norm": 511.52783203125, "learning_rate": 4.89035275027082e-06, "loss": 93.7879, "step": 68840 }, { "epoch": 0.5695495719071845, "grad_norm": 881.3643188476562, "learning_rate": 4.888942726743353e-06, "loss": 113.874, "step": 68850 }, { "epoch": 0.5696322951565538, "grad_norm": 638.2870483398438, "learning_rate": 4.887532712052206e-06, "loss": 95.4349, "step": 68860 }, { "epoch": 0.569715018405923, "grad_norm": 943.398681640625, "learning_rate": 4.886122706309563e-06, "loss": 90.6776, "step": 68870 }, { "epoch": 0.5697977416552922, "grad_norm": 827.296142578125, "learning_rate": 4.884712709627614e-06, "loss": 106.1661, "step": 68880 }, { "epoch": 0.5698804649046615, "grad_norm": 942.6498413085938, "learning_rate": 4.8833027221185455e-06, "loss": 101.3283, "step": 68890 }, { "epoch": 0.5699631881540307, "grad_norm": 805.362060546875, "learning_rate": 4.881892743894543e-06, "loss": 63.2547, "step": 68900 }, { "epoch": 0.5700459114033999, "grad_norm": 812.5333251953125, "learning_rate": 4.880482775067794e-06, "loss": 80.0272, "step": 68910 }, { "epoch": 0.5701286346527692, "grad_norm": 1515.912109375, "learning_rate": 4.879072815750481e-06, "loss": 102.9636, "step": 68920 }, { "epoch": 0.5702113579021384, "grad_norm": 705.251953125, "learning_rate": 4.87766286605479e-06, "loss": 78.7777, "step": 68930 }, { "epoch": 0.5702940811515076, "grad_norm": 1174.0625, "learning_rate": 4.876252926092903e-06, "loss": 75.163, "step": 68940 }, { "epoch": 0.5703768044008769, "grad_norm": 731.8579711914062, "learning_rate": 4.874842995977004e-06, "loss": 100.2167, "step": 68950 }, { "epoch": 0.5704595276502461, "grad_norm": 615.9132690429688, "learning_rate": 4.873433075819272e-06, "loss": 74.5286, "step": 68960 }, { "epoch": 0.5705422508996153, "grad_norm": 1427.2105712890625, "learning_rate": 4.87202316573189e-06, "loss": 115.191, "step": 68970 }, { "epoch": 0.5706249741489846, "grad_norm": 464.9676208496094, "learning_rate": 4.870613265827037e-06, "loss": 107.5646, "step": 68980 }, { "epoch": 0.5707076973983538, "grad_norm": 664.8630981445312, "learning_rate": 4.869203376216891e-06, "loss": 89.4376, "step": 68990 }, { "epoch": 0.570790420647723, "grad_norm": 952.9496459960938, "learning_rate": 4.867793497013634e-06, "loss": 87.7081, "step": 69000 }, { "epoch": 0.5708731438970923, "grad_norm": 864.7783203125, "learning_rate": 4.866383628329442e-06, "loss": 103.678, "step": 69010 }, { "epoch": 0.5709558671464615, "grad_norm": 616.1771850585938, "learning_rate": 4.86497377027649e-06, "loss": 74.9962, "step": 69020 }, { "epoch": 0.5710385903958307, "grad_norm": 1259.6622314453125, "learning_rate": 4.863563922966957e-06, "loss": 98.8335, "step": 69030 }, { "epoch": 0.5711213136452, "grad_norm": 562.259765625, "learning_rate": 4.862154086513016e-06, "loss": 79.6309, "step": 69040 }, { "epoch": 0.5712040368945692, "grad_norm": 941.865966796875, "learning_rate": 4.860744261026841e-06, "loss": 91.4525, "step": 69050 }, { "epoch": 0.5712867601439384, "grad_norm": 1558.767578125, "learning_rate": 4.8593344466206075e-06, "loss": 102.5667, "step": 69060 }, { "epoch": 0.5713694833933077, "grad_norm": 677.3783569335938, "learning_rate": 4.857924643406485e-06, "loss": 121.3402, "step": 69070 }, { "epoch": 0.5714522066426769, "grad_norm": 1102.9102783203125, "learning_rate": 4.856514851496647e-06, "loss": 89.2696, "step": 69080 }, { "epoch": 0.5715349298920461, "grad_norm": 510.9609375, "learning_rate": 4.8551050710032625e-06, "loss": 120.3483, "step": 69090 }, { "epoch": 0.5716176531414154, "grad_norm": 624.8123779296875, "learning_rate": 4.853695302038504e-06, "loss": 99.4614, "step": 69100 }, { "epoch": 0.5717003763907846, "grad_norm": 953.24267578125, "learning_rate": 4.8522855447145385e-06, "loss": 99.8863, "step": 69110 }, { "epoch": 0.5717830996401538, "grad_norm": 415.95648193359375, "learning_rate": 4.850875799143537e-06, "loss": 79.1049, "step": 69120 }, { "epoch": 0.5718658228895231, "grad_norm": 1333.311279296875, "learning_rate": 4.84946606543766e-06, "loss": 77.1295, "step": 69130 }, { "epoch": 0.5719485461388923, "grad_norm": 1068.9039306640625, "learning_rate": 4.848056343709079e-06, "loss": 63.2696, "step": 69140 }, { "epoch": 0.5720312693882615, "grad_norm": 2413.59814453125, "learning_rate": 4.846646634069957e-06, "loss": 98.2298, "step": 69150 }, { "epoch": 0.5721139926376309, "grad_norm": 919.4255981445312, "learning_rate": 4.845236936632458e-06, "loss": 105.757, "step": 69160 }, { "epoch": 0.572196715887, "grad_norm": 1202.3345947265625, "learning_rate": 4.843827251508747e-06, "loss": 101.2527, "step": 69170 }, { "epoch": 0.5722794391363693, "grad_norm": 699.7549438476562, "learning_rate": 4.842417578810984e-06, "loss": 76.5603, "step": 69180 }, { "epoch": 0.5723621623857386, "grad_norm": 579.6343383789062, "learning_rate": 4.841007918651329e-06, "loss": 106.7701, "step": 69190 }, { "epoch": 0.5724448856351078, "grad_norm": 625.5684204101562, "learning_rate": 4.839598271141947e-06, "loss": 91.3166, "step": 69200 }, { "epoch": 0.572527608884477, "grad_norm": 773.6903076171875, "learning_rate": 4.8381886363949956e-06, "loss": 75.625, "step": 69210 }, { "epoch": 0.5726103321338463, "grad_norm": 980.545166015625, "learning_rate": 4.83677901452263e-06, "loss": 93.6299, "step": 69220 }, { "epoch": 0.5726930553832155, "grad_norm": 606.6036987304688, "learning_rate": 4.835369405637009e-06, "loss": 102.3185, "step": 69230 }, { "epoch": 0.5727757786325847, "grad_norm": 922.7852783203125, "learning_rate": 4.833959809850288e-06, "loss": 89.2718, "step": 69240 }, { "epoch": 0.572858501881954, "grad_norm": 918.3201293945312, "learning_rate": 4.832550227274624e-06, "loss": 95.1504, "step": 69250 }, { "epoch": 0.5729412251313232, "grad_norm": 972.9816284179688, "learning_rate": 4.83114065802217e-06, "loss": 106.1438, "step": 69260 }, { "epoch": 0.5730239483806924, "grad_norm": 1341.721435546875, "learning_rate": 4.829731102205079e-06, "loss": 99.2695, "step": 69270 }, { "epoch": 0.5731066716300617, "grad_norm": 927.3786010742188, "learning_rate": 4.828321559935502e-06, "loss": 92.5324, "step": 69280 }, { "epoch": 0.5731893948794309, "grad_norm": 835.2514038085938, "learning_rate": 4.826912031325592e-06, "loss": 79.8504, "step": 69290 }, { "epoch": 0.5732721181288001, "grad_norm": 480.0574951171875, "learning_rate": 4.825502516487497e-06, "loss": 68.6633, "step": 69300 }, { "epoch": 0.5733548413781694, "grad_norm": 1109.6693115234375, "learning_rate": 4.824093015533365e-06, "loss": 91.0974, "step": 69310 }, { "epoch": 0.5734375646275386, "grad_norm": 1023.72314453125, "learning_rate": 4.822683528575344e-06, "loss": 81.7075, "step": 69320 }, { "epoch": 0.5735202878769078, "grad_norm": 689.5050048828125, "learning_rate": 4.8212740557255815e-06, "loss": 88.6784, "step": 69330 }, { "epoch": 0.573603011126277, "grad_norm": 864.6280517578125, "learning_rate": 4.819864597096222e-06, "loss": 98.9668, "step": 69340 }, { "epoch": 0.5736857343756463, "grad_norm": 502.53045654296875, "learning_rate": 4.81845515279941e-06, "loss": 80.9407, "step": 69350 }, { "epoch": 0.5737684576250155, "grad_norm": 1652.8223876953125, "learning_rate": 4.817045722947288e-06, "loss": 88.5993, "step": 69360 }, { "epoch": 0.5738511808743847, "grad_norm": 417.58795166015625, "learning_rate": 4.815636307651998e-06, "loss": 64.912, "step": 69370 }, { "epoch": 0.573933904123754, "grad_norm": 2181.54443359375, "learning_rate": 4.814226907025683e-06, "loss": 93.8224, "step": 69380 }, { "epoch": 0.5740166273731232, "grad_norm": 961.0506591796875, "learning_rate": 4.812817521180479e-06, "loss": 94.1438, "step": 69390 }, { "epoch": 0.5740993506224924, "grad_norm": 783.2133178710938, "learning_rate": 4.811408150228526e-06, "loss": 79.3694, "step": 69400 }, { "epoch": 0.5741820738718617, "grad_norm": 614.4639892578125, "learning_rate": 4.80999879428196e-06, "loss": 96.5799, "step": 69410 }, { "epoch": 0.5742647971212309, "grad_norm": 1084.011474609375, "learning_rate": 4.808589453452918e-06, "loss": 112.4365, "step": 69420 }, { "epoch": 0.5743475203706001, "grad_norm": 781.2046508789062, "learning_rate": 4.807180127853535e-06, "loss": 82.4409, "step": 69430 }, { "epoch": 0.5744302436199694, "grad_norm": 884.5288696289062, "learning_rate": 4.8057708175959446e-06, "loss": 113.9509, "step": 69440 }, { "epoch": 0.5745129668693386, "grad_norm": 927.2460327148438, "learning_rate": 4.804361522792278e-06, "loss": 82.8712, "step": 69450 }, { "epoch": 0.5745956901187078, "grad_norm": 778.8930053710938, "learning_rate": 4.8029522435546695e-06, "loss": 71.4773, "step": 69460 }, { "epoch": 0.5746784133680771, "grad_norm": 631.3436889648438, "learning_rate": 4.801542979995245e-06, "loss": 81.125, "step": 69470 }, { "epoch": 0.5747611366174463, "grad_norm": 812.0714111328125, "learning_rate": 4.800133732226135e-06, "loss": 82.7905, "step": 69480 }, { "epoch": 0.5748438598668155, "grad_norm": 738.4343872070312, "learning_rate": 4.798724500359467e-06, "loss": 109.1766, "step": 69490 }, { "epoch": 0.5749265831161848, "grad_norm": 1364.8973388671875, "learning_rate": 4.7973152845073666e-06, "loss": 74.5933, "step": 69500 }, { "epoch": 0.575009306365554, "grad_norm": 1043.5596923828125, "learning_rate": 4.795906084781958e-06, "loss": 78.0541, "step": 69510 }, { "epoch": 0.5750920296149232, "grad_norm": 1055.900146484375, "learning_rate": 4.7944969012953656e-06, "loss": 67.5411, "step": 69520 }, { "epoch": 0.5751747528642925, "grad_norm": 1076.0845947265625, "learning_rate": 4.793087734159711e-06, "loss": 84.8314, "step": 69530 }, { "epoch": 0.5752574761136617, "grad_norm": 731.822265625, "learning_rate": 4.791678583487118e-06, "loss": 116.2365, "step": 69540 }, { "epoch": 0.5753401993630309, "grad_norm": 490.7128601074219, "learning_rate": 4.790269449389703e-06, "loss": 113.4962, "step": 69550 }, { "epoch": 0.5754229226124002, "grad_norm": 4792.376953125, "learning_rate": 4.788860331979586e-06, "loss": 195.9267, "step": 69560 }, { "epoch": 0.5755056458617694, "grad_norm": 718.0413208007812, "learning_rate": 4.787451231368883e-06, "loss": 83.7023, "step": 69570 }, { "epoch": 0.5755883691111386, "grad_norm": 1221.1470947265625, "learning_rate": 4.786042147669709e-06, "loss": 99.3645, "step": 69580 }, { "epoch": 0.575671092360508, "grad_norm": 642.6139526367188, "learning_rate": 4.784633080994181e-06, "loss": 74.7277, "step": 69590 }, { "epoch": 0.5757538156098772, "grad_norm": 725.33349609375, "learning_rate": 4.783224031454409e-06, "loss": 108.2193, "step": 69600 }, { "epoch": 0.5758365388592463, "grad_norm": 512.4448852539062, "learning_rate": 4.781814999162507e-06, "loss": 100.0694, "step": 69610 }, { "epoch": 0.5759192621086157, "grad_norm": 540.7772216796875, "learning_rate": 4.780405984230582e-06, "loss": 76.5325, "step": 69620 }, { "epoch": 0.5760019853579849, "grad_norm": 1100.6224365234375, "learning_rate": 4.778996986770747e-06, "loss": 125.3361, "step": 69630 }, { "epoch": 0.5760847086073541, "grad_norm": 704.91748046875, "learning_rate": 4.777588006895109e-06, "loss": 84.2594, "step": 69640 }, { "epoch": 0.5761674318567234, "grad_norm": 976.7535400390625, "learning_rate": 4.77617904471577e-06, "loss": 86.525, "step": 69650 }, { "epoch": 0.5762501551060926, "grad_norm": 1758.328857421875, "learning_rate": 4.774770100344838e-06, "loss": 105.5501, "step": 69660 }, { "epoch": 0.5763328783554618, "grad_norm": 717.3983764648438, "learning_rate": 4.7733611738944155e-06, "loss": 78.1173, "step": 69670 }, { "epoch": 0.5764156016048311, "grad_norm": 1015.1489868164062, "learning_rate": 4.7719522654766044e-06, "loss": 93.4123, "step": 69680 }, { "epoch": 0.5764983248542003, "grad_norm": 2092.849365234375, "learning_rate": 4.7705433752035045e-06, "loss": 120.9727, "step": 69690 }, { "epoch": 0.5765810481035695, "grad_norm": 491.3244323730469, "learning_rate": 4.7691345031872156e-06, "loss": 70.0393, "step": 69700 }, { "epoch": 0.5766637713529388, "grad_norm": 954.6812133789062, "learning_rate": 4.767725649539833e-06, "loss": 87.8252, "step": 69710 }, { "epoch": 0.576746494602308, "grad_norm": 780.2823486328125, "learning_rate": 4.766316814373458e-06, "loss": 125.7246, "step": 69720 }, { "epoch": 0.5768292178516772, "grad_norm": 1444.184326171875, "learning_rate": 4.76490799780018e-06, "loss": 94.4206, "step": 69730 }, { "epoch": 0.5769119411010465, "grad_norm": 838.7236328125, "learning_rate": 4.763499199932093e-06, "loss": 126.1872, "step": 69740 }, { "epoch": 0.5769946643504157, "grad_norm": 1014.107421875, "learning_rate": 4.762090420881289e-06, "loss": 108.3225, "step": 69750 }, { "epoch": 0.5770773875997849, "grad_norm": 613.5991821289062, "learning_rate": 4.760681660759859e-06, "loss": 101.5193, "step": 69760 }, { "epoch": 0.5771601108491542, "grad_norm": 661.6652221679688, "learning_rate": 4.7592729196798905e-06, "loss": 78.5238, "step": 69770 }, { "epoch": 0.5772428340985234, "grad_norm": 917.331298828125, "learning_rate": 4.757864197753472e-06, "loss": 94.9886, "step": 69780 }, { "epoch": 0.5773255573478926, "grad_norm": 750.0388793945312, "learning_rate": 4.7564554950926876e-06, "loss": 68.4733, "step": 69790 }, { "epoch": 0.5774082805972619, "grad_norm": 754.461181640625, "learning_rate": 4.755046811809621e-06, "loss": 68.0218, "step": 69800 }, { "epoch": 0.5774910038466311, "grad_norm": 1071.869384765625, "learning_rate": 4.7536381480163575e-06, "loss": 147.5622, "step": 69810 }, { "epoch": 0.5775737270960003, "grad_norm": 809.9874877929688, "learning_rate": 4.752229503824974e-06, "loss": 68.0273, "step": 69820 }, { "epoch": 0.5776564503453696, "grad_norm": 1067.3671875, "learning_rate": 4.7508208793475515e-06, "loss": 77.8083, "step": 69830 }, { "epoch": 0.5777391735947388, "grad_norm": 757.71435546875, "learning_rate": 4.749412274696169e-06, "loss": 62.3041, "step": 69840 }, { "epoch": 0.577821896844108, "grad_norm": 619.0784912109375, "learning_rate": 4.748003689982901e-06, "loss": 79.2833, "step": 69850 }, { "epoch": 0.5779046200934773, "grad_norm": 631.8236083984375, "learning_rate": 4.746595125319823e-06, "loss": 99.2251, "step": 69860 }, { "epoch": 0.5779873433428465, "grad_norm": 1292.0946044921875, "learning_rate": 4.745186580819008e-06, "loss": 108.7097, "step": 69870 }, { "epoch": 0.5780700665922157, "grad_norm": 2003.7862548828125, "learning_rate": 4.743778056592528e-06, "loss": 101.937, "step": 69880 }, { "epoch": 0.578152789841585, "grad_norm": 978.4115600585938, "learning_rate": 4.742369552752453e-06, "loss": 106.5378, "step": 69890 }, { "epoch": 0.5782355130909542, "grad_norm": 603.2467041015625, "learning_rate": 4.740961069410848e-06, "loss": 93.0756, "step": 69900 }, { "epoch": 0.5783182363403234, "grad_norm": 951.097900390625, "learning_rate": 4.7395526066797835e-06, "loss": 85.2565, "step": 69910 }, { "epoch": 0.5784009595896927, "grad_norm": 813.0645751953125, "learning_rate": 4.738144164671322e-06, "loss": 123.9502, "step": 69920 }, { "epoch": 0.5784836828390619, "grad_norm": 1287.8382568359375, "learning_rate": 4.736735743497528e-06, "loss": 98.9144, "step": 69930 }, { "epoch": 0.5785664060884311, "grad_norm": 976.73974609375, "learning_rate": 4.735327343270461e-06, "loss": 102.1547, "step": 69940 }, { "epoch": 0.5786491293378004, "grad_norm": 874.163330078125, "learning_rate": 4.733918964102185e-06, "loss": 108.0046, "step": 69950 }, { "epoch": 0.5787318525871696, "grad_norm": 493.4253845214844, "learning_rate": 4.732510606104754e-06, "loss": 81.0991, "step": 69960 }, { "epoch": 0.5788145758365388, "grad_norm": 481.89520263671875, "learning_rate": 4.731102269390227e-06, "loss": 72.0266, "step": 69970 }, { "epoch": 0.5788972990859081, "grad_norm": 1048.3861083984375, "learning_rate": 4.729693954070661e-06, "loss": 101.3536, "step": 69980 }, { "epoch": 0.5789800223352773, "grad_norm": 514.9654541015625, "learning_rate": 4.728285660258104e-06, "loss": 84.173, "step": 69990 }, { "epoch": 0.5790627455846465, "grad_norm": 1212.3302001953125, "learning_rate": 4.726877388064609e-06, "loss": 95.1656, "step": 70000 }, { "epoch": 0.5791454688340159, "grad_norm": 594.7462768554688, "learning_rate": 4.725469137602229e-06, "loss": 77.2462, "step": 70010 }, { "epoch": 0.579228192083385, "grad_norm": 1222.4024658203125, "learning_rate": 4.724060908983008e-06, "loss": 77.3384, "step": 70020 }, { "epoch": 0.5793109153327543, "grad_norm": 953.1253051757812, "learning_rate": 4.7226527023189954e-06, "loss": 69.5169, "step": 70030 }, { "epoch": 0.5793936385821236, "grad_norm": 474.82373046875, "learning_rate": 4.721244517722233e-06, "loss": 62.3261, "step": 70040 }, { "epoch": 0.5794763618314928, "grad_norm": 958.0091552734375, "learning_rate": 4.719836355304766e-06, "loss": 114.5203, "step": 70050 }, { "epoch": 0.579559085080862, "grad_norm": 977.0399780273438, "learning_rate": 4.718428215178634e-06, "loss": 92.8079, "step": 70060 }, { "epoch": 0.5796418083302312, "grad_norm": 889.9998779296875, "learning_rate": 4.717020097455879e-06, "loss": 118.2158, "step": 70070 }, { "epoch": 0.5797245315796005, "grad_norm": 730.516357421875, "learning_rate": 4.715612002248533e-06, "loss": 90.555, "step": 70080 }, { "epoch": 0.5798072548289697, "grad_norm": 905.8795776367188, "learning_rate": 4.714203929668637e-06, "loss": 82.7224, "step": 70090 }, { "epoch": 0.5798899780783389, "grad_norm": 925.4937744140625, "learning_rate": 4.712795879828221e-06, "loss": 84.4116, "step": 70100 }, { "epoch": 0.5799727013277082, "grad_norm": 1022.7550659179688, "learning_rate": 4.71138785283932e-06, "loss": 79.103, "step": 70110 }, { "epoch": 0.5800554245770774, "grad_norm": 765.399169921875, "learning_rate": 4.709979848813963e-06, "loss": 88.3323, "step": 70120 }, { "epoch": 0.5801381478264466, "grad_norm": 806.2305297851562, "learning_rate": 4.7085718678641776e-06, "loss": 117.2993, "step": 70130 }, { "epoch": 0.5802208710758159, "grad_norm": 770.2034301757812, "learning_rate": 4.70716391010199e-06, "loss": 96.3582, "step": 70140 }, { "epoch": 0.5803035943251851, "grad_norm": 934.9890747070312, "learning_rate": 4.70575597563943e-06, "loss": 85.1314, "step": 70150 }, { "epoch": 0.5803863175745543, "grad_norm": 1144.4893798828125, "learning_rate": 4.704348064588514e-06, "loss": 95.0485, "step": 70160 }, { "epoch": 0.5804690408239236, "grad_norm": 441.04132080078125, "learning_rate": 4.702940177061266e-06, "loss": 59.0982, "step": 70170 }, { "epoch": 0.5805517640732928, "grad_norm": 883.7022705078125, "learning_rate": 4.7015323131697035e-06, "loss": 163.2424, "step": 70180 }, { "epoch": 0.580634487322662, "grad_norm": 1098.4498291015625, "learning_rate": 4.700124473025846e-06, "loss": 87.0862, "step": 70190 }, { "epoch": 0.5807172105720313, "grad_norm": 1009.8033447265625, "learning_rate": 4.6987166567417085e-06, "loss": 78.1907, "step": 70200 }, { "epoch": 0.5807999338214005, "grad_norm": 1105.3592529296875, "learning_rate": 4.697308864429303e-06, "loss": 98.8104, "step": 70210 }, { "epoch": 0.5808826570707697, "grad_norm": 786.5978393554688, "learning_rate": 4.695901096200643e-06, "loss": 79.2503, "step": 70220 }, { "epoch": 0.580965380320139, "grad_norm": 480.7000427246094, "learning_rate": 4.694493352167736e-06, "loss": 107.3822, "step": 70230 }, { "epoch": 0.5810481035695082, "grad_norm": 647.6439208984375, "learning_rate": 4.693085632442593e-06, "loss": 55.4946, "step": 70240 }, { "epoch": 0.5811308268188774, "grad_norm": 1218.6649169921875, "learning_rate": 4.691677937137217e-06, "loss": 116.4978, "step": 70250 }, { "epoch": 0.5812135500682467, "grad_norm": 511.2562561035156, "learning_rate": 4.690270266363612e-06, "loss": 120.063, "step": 70260 }, { "epoch": 0.5812962733176159, "grad_norm": 822.0852661132812, "learning_rate": 4.688862620233779e-06, "loss": 100.5438, "step": 70270 }, { "epoch": 0.5813789965669851, "grad_norm": 1118.2822265625, "learning_rate": 4.687454998859721e-06, "loss": 99.0379, "step": 70280 }, { "epoch": 0.5814617198163544, "grad_norm": 733.8569946289062, "learning_rate": 4.686047402353433e-06, "loss": 78.6712, "step": 70290 }, { "epoch": 0.5815444430657236, "grad_norm": 691.5249633789062, "learning_rate": 4.684639830826913e-06, "loss": 99.4106, "step": 70300 }, { "epoch": 0.5816271663150928, "grad_norm": 542.6314086914062, "learning_rate": 4.683232284392155e-06, "loss": 96.8427, "step": 70310 }, { "epoch": 0.5817098895644621, "grad_norm": 357.3701477050781, "learning_rate": 4.681824763161151e-06, "loss": 96.8162, "step": 70320 }, { "epoch": 0.5817926128138313, "grad_norm": 951.8953857421875, "learning_rate": 4.6804172672458905e-06, "loss": 111.6947, "step": 70330 }, { "epoch": 0.5818753360632005, "grad_norm": 743.7538452148438, "learning_rate": 4.67900979675836e-06, "loss": 97.8226, "step": 70340 }, { "epoch": 0.5819580593125698, "grad_norm": 659.83056640625, "learning_rate": 4.677602351810547e-06, "loss": 96.5504, "step": 70350 }, { "epoch": 0.582040782561939, "grad_norm": 643.2234497070312, "learning_rate": 4.676194932514435e-06, "loss": 70.3423, "step": 70360 }, { "epoch": 0.5821235058113082, "grad_norm": 760.5200805664062, "learning_rate": 4.674787538982006e-06, "loss": 100.9573, "step": 70370 }, { "epoch": 0.5822062290606775, "grad_norm": 504.2164306640625, "learning_rate": 4.6733801713252405e-06, "loss": 85.723, "step": 70380 }, { "epoch": 0.5822889523100467, "grad_norm": 458.2579650878906, "learning_rate": 4.671972829656116e-06, "loss": 67.0866, "step": 70390 }, { "epoch": 0.5823716755594159, "grad_norm": 1060.4288330078125, "learning_rate": 4.670565514086607e-06, "loss": 86.1434, "step": 70400 }, { "epoch": 0.5824543988087852, "grad_norm": 478.5907897949219, "learning_rate": 4.669158224728691e-06, "loss": 87.8763, "step": 70410 }, { "epoch": 0.5825371220581544, "grad_norm": 1053.1619873046875, "learning_rate": 4.667750961694334e-06, "loss": 95.2859, "step": 70420 }, { "epoch": 0.5826198453075236, "grad_norm": 752.842041015625, "learning_rate": 4.666343725095509e-06, "loss": 100.9934, "step": 70430 }, { "epoch": 0.582702568556893, "grad_norm": 687.784912109375, "learning_rate": 4.6649365150441825e-06, "loss": 76.4099, "step": 70440 }, { "epoch": 0.5827852918062622, "grad_norm": 836.1194458007812, "learning_rate": 4.66352933165232e-06, "loss": 91.4804, "step": 70450 }, { "epoch": 0.5828680150556313, "grad_norm": 739.9501953125, "learning_rate": 4.6621221750318835e-06, "loss": 71.6738, "step": 70460 }, { "epoch": 0.5829507383050007, "grad_norm": 392.7385559082031, "learning_rate": 4.660715045294834e-06, "loss": 110.5148, "step": 70470 }, { "epoch": 0.5830334615543699, "grad_norm": 821.631103515625, "learning_rate": 4.659307942553133e-06, "loss": 89.2418, "step": 70480 }, { "epoch": 0.5831161848037391, "grad_norm": 978.71142578125, "learning_rate": 4.657900866918735e-06, "loss": 94.3087, "step": 70490 }, { "epoch": 0.5831989080531084, "grad_norm": 1593.7996826171875, "learning_rate": 4.6564938185035954e-06, "loss": 103.8715, "step": 70500 }, { "epoch": 0.5832816313024776, "grad_norm": 825.0999145507812, "learning_rate": 4.655086797419666e-06, "loss": 99.5245, "step": 70510 }, { "epoch": 0.5833643545518468, "grad_norm": 541.9385375976562, "learning_rate": 4.653679803778897e-06, "loss": 87.7526, "step": 70520 }, { "epoch": 0.5834470778012161, "grad_norm": 1196.7366943359375, "learning_rate": 4.652272837693237e-06, "loss": 104.002, "step": 70530 }, { "epoch": 0.5835298010505853, "grad_norm": 718.0545043945312, "learning_rate": 4.650865899274632e-06, "loss": 125.1911, "step": 70540 }, { "epoch": 0.5836125242999545, "grad_norm": 612.04736328125, "learning_rate": 4.649458988635023e-06, "loss": 75.9752, "step": 70550 }, { "epoch": 0.5836952475493238, "grad_norm": 1365.9921875, "learning_rate": 4.6480521058863546e-06, "loss": 114.9041, "step": 70560 }, { "epoch": 0.583777970798693, "grad_norm": 1484.0291748046875, "learning_rate": 4.646645251140564e-06, "loss": 102.265, "step": 70570 }, { "epoch": 0.5838606940480622, "grad_norm": 756.23974609375, "learning_rate": 4.6452384245095924e-06, "loss": 82.9127, "step": 70580 }, { "epoch": 0.5839434172974315, "grad_norm": 841.9822387695312, "learning_rate": 4.643831626105369e-06, "loss": 107.5036, "step": 70590 }, { "epoch": 0.5840261405468007, "grad_norm": 947.1768798828125, "learning_rate": 4.642424856039827e-06, "loss": 89.3748, "step": 70600 }, { "epoch": 0.5841088637961699, "grad_norm": 939.9810791015625, "learning_rate": 4.6410181144249e-06, "loss": 113.3098, "step": 70610 }, { "epoch": 0.5841915870455392, "grad_norm": 819.6731567382812, "learning_rate": 4.639611401372514e-06, "loss": 75.1481, "step": 70620 }, { "epoch": 0.5842743102949084, "grad_norm": 1223.1297607421875, "learning_rate": 4.638204716994594e-06, "loss": 76.0569, "step": 70630 }, { "epoch": 0.5843570335442776, "grad_norm": 775.81689453125, "learning_rate": 4.636798061403065e-06, "loss": 79.0008, "step": 70640 }, { "epoch": 0.5844397567936469, "grad_norm": 796.52978515625, "learning_rate": 4.635391434709847e-06, "loss": 87.8786, "step": 70650 }, { "epoch": 0.5845224800430161, "grad_norm": 4887.2314453125, "learning_rate": 4.6339848370268585e-06, "loss": 128.6168, "step": 70660 }, { "epoch": 0.5846052032923853, "grad_norm": 644.0697021484375, "learning_rate": 4.63257826846602e-06, "loss": 83.8805, "step": 70670 }, { "epoch": 0.5846879265417546, "grad_norm": 866.1959838867188, "learning_rate": 4.6311717291392396e-06, "loss": 101.9503, "step": 70680 }, { "epoch": 0.5847706497911238, "grad_norm": 1018.76025390625, "learning_rate": 4.629765219158433e-06, "loss": 82.65, "step": 70690 }, { "epoch": 0.584853373040493, "grad_norm": 1179.1864013671875, "learning_rate": 4.628358738635507e-06, "loss": 134.8187, "step": 70700 }, { "epoch": 0.5849360962898623, "grad_norm": 656.47900390625, "learning_rate": 4.626952287682372e-06, "loss": 117.6582, "step": 70710 }, { "epoch": 0.5850188195392315, "grad_norm": 1300.9158935546875, "learning_rate": 4.6255458664109306e-06, "loss": 121.145, "step": 70720 }, { "epoch": 0.5851015427886007, "grad_norm": 1486.7906494140625, "learning_rate": 4.624139474933087e-06, "loss": 94.2786, "step": 70730 }, { "epoch": 0.58518426603797, "grad_norm": 759.8827514648438, "learning_rate": 4.62273311336074e-06, "loss": 88.7782, "step": 70740 }, { "epoch": 0.5852669892873392, "grad_norm": 746.8486938476562, "learning_rate": 4.62132678180579e-06, "loss": 78.3264, "step": 70750 }, { "epoch": 0.5853497125367084, "grad_norm": 837.21337890625, "learning_rate": 4.619920480380127e-06, "loss": 119.9007, "step": 70760 }, { "epoch": 0.5854324357860777, "grad_norm": 10413.9169921875, "learning_rate": 4.618514209195648e-06, "loss": 153.6559, "step": 70770 }, { "epoch": 0.5855151590354469, "grad_norm": 851.4038696289062, "learning_rate": 4.617107968364243e-06, "loss": 90.0972, "step": 70780 }, { "epoch": 0.5855978822848161, "grad_norm": 551.212646484375, "learning_rate": 4.615701757997799e-06, "loss": 134.4456, "step": 70790 }, { "epoch": 0.5856806055341853, "grad_norm": 1029.0963134765625, "learning_rate": 4.614295578208202e-06, "loss": 74.9261, "step": 70800 }, { "epoch": 0.5857633287835546, "grad_norm": 1182.5975341796875, "learning_rate": 4.612889429107337e-06, "loss": 94.9276, "step": 70810 }, { "epoch": 0.5858460520329238, "grad_norm": 879.9530639648438, "learning_rate": 4.611483310807082e-06, "loss": 109.8622, "step": 70820 }, { "epoch": 0.585928775282293, "grad_norm": 756.7501831054688, "learning_rate": 4.610077223419319e-06, "loss": 111.9088, "step": 70830 }, { "epoch": 0.5860114985316623, "grad_norm": 935.3068237304688, "learning_rate": 4.608671167055922e-06, "loss": 114.9015, "step": 70840 }, { "epoch": 0.5860942217810315, "grad_norm": 764.818359375, "learning_rate": 4.607265141828762e-06, "loss": 95.3971, "step": 70850 }, { "epoch": 0.5861769450304007, "grad_norm": 516.7097778320312, "learning_rate": 4.605859147849713e-06, "loss": 66.5408, "step": 70860 }, { "epoch": 0.58625966827977, "grad_norm": 1200.6766357421875, "learning_rate": 4.604453185230643e-06, "loss": 94.5176, "step": 70870 }, { "epoch": 0.5863423915291393, "grad_norm": 1096.214599609375, "learning_rate": 4.603047254083418e-06, "loss": 123.1105, "step": 70880 }, { "epoch": 0.5864251147785084, "grad_norm": 789.3768920898438, "learning_rate": 4.601641354519901e-06, "loss": 97.1097, "step": 70890 }, { "epoch": 0.5865078380278778, "grad_norm": 483.42315673828125, "learning_rate": 4.6002354866519526e-06, "loss": 100.7335, "step": 70900 }, { "epoch": 0.586590561277247, "grad_norm": 978.4453125, "learning_rate": 4.598829650591432e-06, "loss": 91.333, "step": 70910 }, { "epoch": 0.5866732845266162, "grad_norm": 653.1732177734375, "learning_rate": 4.597423846450196e-06, "loss": 86.567, "step": 70920 }, { "epoch": 0.5867560077759855, "grad_norm": 714.1948852539062, "learning_rate": 4.596018074340097e-06, "loss": 86.6666, "step": 70930 }, { "epoch": 0.5868387310253547, "grad_norm": 635.244140625, "learning_rate": 4.594612334372985e-06, "loss": 81.3609, "step": 70940 }, { "epoch": 0.5869214542747239, "grad_norm": 790.4684448242188, "learning_rate": 4.59320662666071e-06, "loss": 91.6696, "step": 70950 }, { "epoch": 0.5870041775240932, "grad_norm": 636.0848388671875, "learning_rate": 4.591800951315116e-06, "loss": 78.6052, "step": 70960 }, { "epoch": 0.5870869007734624, "grad_norm": 1063.884765625, "learning_rate": 4.590395308448046e-06, "loss": 88.4161, "step": 70970 }, { "epoch": 0.5871696240228316, "grad_norm": 1092.5953369140625, "learning_rate": 4.588989698171343e-06, "loss": 89.8243, "step": 70980 }, { "epoch": 0.5872523472722009, "grad_norm": 758.7847290039062, "learning_rate": 4.587584120596842e-06, "loss": 95.1261, "step": 70990 }, { "epoch": 0.5873350705215701, "grad_norm": 634.198486328125, "learning_rate": 4.58617857583638e-06, "loss": 72.3683, "step": 71000 }, { "epoch": 0.5874177937709393, "grad_norm": 774.2512817382812, "learning_rate": 4.5847730640017926e-06, "loss": 86.8859, "step": 71010 }, { "epoch": 0.5875005170203086, "grad_norm": 682.7206420898438, "learning_rate": 4.5833675852049045e-06, "loss": 95.423, "step": 71020 }, { "epoch": 0.5875832402696778, "grad_norm": 458.109130859375, "learning_rate": 4.5819621395575445e-06, "loss": 90.4719, "step": 71030 }, { "epoch": 0.587665963519047, "grad_norm": 784.8795166015625, "learning_rate": 4.5805567271715395e-06, "loss": 93.2228, "step": 71040 }, { "epoch": 0.5877486867684163, "grad_norm": 802.4894409179688, "learning_rate": 4.5791513481587105e-06, "loss": 68.1955, "step": 71050 }, { "epoch": 0.5878314100177855, "grad_norm": 834.863525390625, "learning_rate": 4.577746002630878e-06, "loss": 81.5406, "step": 71060 }, { "epoch": 0.5879141332671547, "grad_norm": 1166.91796875, "learning_rate": 4.576340690699857e-06, "loss": 88.6559, "step": 71070 }, { "epoch": 0.587996856516524, "grad_norm": 426.1348876953125, "learning_rate": 4.574935412477464e-06, "loss": 92.7223, "step": 71080 }, { "epoch": 0.5880795797658932, "grad_norm": 980.8372192382812, "learning_rate": 4.573530168075508e-06, "loss": 126.4129, "step": 71090 }, { "epoch": 0.5881623030152624, "grad_norm": 789.66796875, "learning_rate": 4.572124957605803e-06, "loss": 80.9297, "step": 71100 }, { "epoch": 0.5882450262646317, "grad_norm": 647.4459228515625, "learning_rate": 4.5707197811801484e-06, "loss": 68.6454, "step": 71110 }, { "epoch": 0.5883277495140009, "grad_norm": 1236.953369140625, "learning_rate": 4.569314638910352e-06, "loss": 79.3519, "step": 71120 }, { "epoch": 0.5884104727633701, "grad_norm": 808.24462890625, "learning_rate": 4.56790953090821e-06, "loss": 126.6436, "step": 71130 }, { "epoch": 0.5884931960127394, "grad_norm": 1413.169921875, "learning_rate": 4.566504457285527e-06, "loss": 102.8847, "step": 71140 }, { "epoch": 0.5885759192621086, "grad_norm": 751.8162231445312, "learning_rate": 4.565099418154093e-06, "loss": 84.422, "step": 71150 }, { "epoch": 0.5886586425114778, "grad_norm": 608.4118041992188, "learning_rate": 4.563694413625703e-06, "loss": 66.0182, "step": 71160 }, { "epoch": 0.5887413657608471, "grad_norm": 413.452392578125, "learning_rate": 4.5622894438121465e-06, "loss": 75.759, "step": 71170 }, { "epoch": 0.5888240890102163, "grad_norm": 522.5778198242188, "learning_rate": 4.560884508825212e-06, "loss": 99.5577, "step": 71180 }, { "epoch": 0.5889068122595855, "grad_norm": 492.1758728027344, "learning_rate": 4.559479608776679e-06, "loss": 91.1286, "step": 71190 }, { "epoch": 0.5889895355089548, "grad_norm": 694.331787109375, "learning_rate": 4.558074743778333e-06, "loss": 96.5787, "step": 71200 }, { "epoch": 0.589072258758324, "grad_norm": 828.5624389648438, "learning_rate": 4.556669913941951e-06, "loss": 85.6967, "step": 71210 }, { "epoch": 0.5891549820076932, "grad_norm": 963.8417358398438, "learning_rate": 4.555265119379308e-06, "loss": 84.3966, "step": 71220 }, { "epoch": 0.5892377052570625, "grad_norm": 869.2294921875, "learning_rate": 4.55386036020218e-06, "loss": 124.0421, "step": 71230 }, { "epoch": 0.5893204285064317, "grad_norm": 1175.8673095703125, "learning_rate": 4.552455636522335e-06, "loss": 127.9119, "step": 71240 }, { "epoch": 0.5894031517558009, "grad_norm": 815.5340576171875, "learning_rate": 4.551050948451542e-06, "loss": 76.2241, "step": 71250 }, { "epoch": 0.5894858750051702, "grad_norm": 769.5640869140625, "learning_rate": 4.549646296101564e-06, "loss": 106.5304, "step": 71260 }, { "epoch": 0.5895685982545394, "grad_norm": 693.4418334960938, "learning_rate": 4.548241679584165e-06, "loss": 79.3208, "step": 71270 }, { "epoch": 0.5896513215039086, "grad_norm": 1170.5986328125, "learning_rate": 4.546837099011101e-06, "loss": 126.9079, "step": 71280 }, { "epoch": 0.589734044753278, "grad_norm": 750.7109375, "learning_rate": 4.545432554494128e-06, "loss": 109.4266, "step": 71290 }, { "epoch": 0.5898167680026472, "grad_norm": 1614.88671875, "learning_rate": 4.544028046145002e-06, "loss": 115.2326, "step": 71300 }, { "epoch": 0.5898994912520164, "grad_norm": 1244.8895263671875, "learning_rate": 4.542623574075471e-06, "loss": 115.9653, "step": 71310 }, { "epoch": 0.5899822145013857, "grad_norm": 1223.9134521484375, "learning_rate": 4.541219138397283e-06, "loss": 107.9536, "step": 71320 }, { "epoch": 0.5900649377507549, "grad_norm": 645.8054809570312, "learning_rate": 4.539814739222182e-06, "loss": 88.0276, "step": 71330 }, { "epoch": 0.5901476610001241, "grad_norm": 906.608154296875, "learning_rate": 4.538410376661912e-06, "loss": 97.0788, "step": 71340 }, { "epoch": 0.5902303842494934, "grad_norm": 655.9107666015625, "learning_rate": 4.537006050828209e-06, "loss": 102.7088, "step": 71350 }, { "epoch": 0.5903131074988626, "grad_norm": 881.0200805664062, "learning_rate": 4.535601761832811e-06, "loss": 94.3685, "step": 71360 }, { "epoch": 0.5903958307482318, "grad_norm": 593.4007568359375, "learning_rate": 4.534197509787448e-06, "loss": 96.9625, "step": 71370 }, { "epoch": 0.5904785539976011, "grad_norm": 362.5093688964844, "learning_rate": 4.5327932948038525e-06, "loss": 76.0315, "step": 71380 }, { "epoch": 0.5905612772469703, "grad_norm": 724.7342529296875, "learning_rate": 4.5313891169937495e-06, "loss": 68.4613, "step": 71390 }, { "epoch": 0.5906440004963395, "grad_norm": 320.0503845214844, "learning_rate": 4.529984976468864e-06, "loss": 59.5351, "step": 71400 }, { "epoch": 0.5907267237457088, "grad_norm": 689.7297973632812, "learning_rate": 4.528580873340916e-06, "loss": 71.2971, "step": 71410 }, { "epoch": 0.590809446995078, "grad_norm": 698.4686889648438, "learning_rate": 4.5271768077216245e-06, "loss": 81.2217, "step": 71420 }, { "epoch": 0.5908921702444472, "grad_norm": 778.4169311523438, "learning_rate": 4.525772779722705e-06, "loss": 78.8058, "step": 71430 }, { "epoch": 0.5909748934938165, "grad_norm": 723.3839721679688, "learning_rate": 4.524368789455872e-06, "loss": 140.3945, "step": 71440 }, { "epoch": 0.5910576167431857, "grad_norm": 1784.62158203125, "learning_rate": 4.5229648370328276e-06, "loss": 99.9209, "step": 71450 }, { "epoch": 0.5911403399925549, "grad_norm": 688.6642456054688, "learning_rate": 4.521560922565282e-06, "loss": 110.5966, "step": 71460 }, { "epoch": 0.5912230632419242, "grad_norm": 1047.2376708984375, "learning_rate": 4.52015704616494e-06, "loss": 67.9934, "step": 71470 }, { "epoch": 0.5913057864912934, "grad_norm": 956.0989379882812, "learning_rate": 4.518753207943498e-06, "loss": 77.9636, "step": 71480 }, { "epoch": 0.5913885097406626, "grad_norm": 832.9586181640625, "learning_rate": 4.517349408012656e-06, "loss": 89.3105, "step": 71490 }, { "epoch": 0.5914712329900319, "grad_norm": 1294.6611328125, "learning_rate": 4.515945646484105e-06, "loss": 108.4286, "step": 71500 }, { "epoch": 0.5915539562394011, "grad_norm": 635.0994262695312, "learning_rate": 4.514541923469538e-06, "loss": 74.8464, "step": 71510 }, { "epoch": 0.5916366794887703, "grad_norm": 1128.41748046875, "learning_rate": 4.513138239080641e-06, "loss": 65.9964, "step": 71520 }, { "epoch": 0.5917194027381395, "grad_norm": 722.1233520507812, "learning_rate": 4.511734593429104e-06, "loss": 106.5479, "step": 71530 }, { "epoch": 0.5918021259875088, "grad_norm": 948.386474609375, "learning_rate": 4.510330986626602e-06, "loss": 87.1451, "step": 71540 }, { "epoch": 0.591884849236878, "grad_norm": 1609.5699462890625, "learning_rate": 4.5089274187848144e-06, "loss": 74.7854, "step": 71550 }, { "epoch": 0.5919675724862472, "grad_norm": 1418.9605712890625, "learning_rate": 4.507523890015421e-06, "loss": 88.9027, "step": 71560 }, { "epoch": 0.5920502957356165, "grad_norm": 553.1968383789062, "learning_rate": 4.5061204004300905e-06, "loss": 79.0763, "step": 71570 }, { "epoch": 0.5921330189849857, "grad_norm": 1095.764892578125, "learning_rate": 4.504716950140492e-06, "loss": 103.5799, "step": 71580 }, { "epoch": 0.5922157422343549, "grad_norm": 527.2064819335938, "learning_rate": 4.503313539258294e-06, "loss": 88.4465, "step": 71590 }, { "epoch": 0.5922984654837242, "grad_norm": 1063.0721435546875, "learning_rate": 4.501910167895158e-06, "loss": 98.2458, "step": 71600 }, { "epoch": 0.5923811887330934, "grad_norm": 2767.47705078125, "learning_rate": 4.500506836162746e-06, "loss": 92.3343, "step": 71610 }, { "epoch": 0.5924639119824626, "grad_norm": 455.406982421875, "learning_rate": 4.499103544172711e-06, "loss": 81.4368, "step": 71620 }, { "epoch": 0.5925466352318319, "grad_norm": 850.3991088867188, "learning_rate": 4.497700292036708e-06, "loss": 70.8303, "step": 71630 }, { "epoch": 0.5926293584812011, "grad_norm": 778.4962768554688, "learning_rate": 4.4962970798663865e-06, "loss": 66.9589, "step": 71640 }, { "epoch": 0.5927120817305703, "grad_norm": 543.8009033203125, "learning_rate": 4.494893907773394e-06, "loss": 69.5775, "step": 71650 }, { "epoch": 0.5927948049799396, "grad_norm": 799.34375, "learning_rate": 4.493490775869377e-06, "loss": 78.267, "step": 71660 }, { "epoch": 0.5928775282293088, "grad_norm": 1260.3624267578125, "learning_rate": 4.492087684265975e-06, "loss": 91.3813, "step": 71670 }, { "epoch": 0.592960251478678, "grad_norm": 714.5103149414062, "learning_rate": 4.490684633074824e-06, "loss": 87.9972, "step": 71680 }, { "epoch": 0.5930429747280473, "grad_norm": 419.715087890625, "learning_rate": 4.489281622407559e-06, "loss": 85.3557, "step": 71690 }, { "epoch": 0.5931256979774165, "grad_norm": 939.987060546875, "learning_rate": 4.487878652375813e-06, "loss": 132.4992, "step": 71700 }, { "epoch": 0.5932084212267857, "grad_norm": 623.05712890625, "learning_rate": 4.486475723091211e-06, "loss": 97.7154, "step": 71710 }, { "epoch": 0.593291144476155, "grad_norm": 852.7836303710938, "learning_rate": 4.485072834665379e-06, "loss": 133.3668, "step": 71720 }, { "epoch": 0.5933738677255243, "grad_norm": 830.4113159179688, "learning_rate": 4.483669987209938e-06, "loss": 85.3274, "step": 71730 }, { "epoch": 0.5934565909748934, "grad_norm": 1263.1221923828125, "learning_rate": 4.482267180836508e-06, "loss": 96.8079, "step": 71740 }, { "epoch": 0.5935393142242628, "grad_norm": 605.0487060546875, "learning_rate": 4.4808644156567e-06, "loss": 86.8117, "step": 71750 }, { "epoch": 0.593622037473632, "grad_norm": 524.0765380859375, "learning_rate": 4.479461691782129e-06, "loss": 74.157, "step": 71760 }, { "epoch": 0.5937047607230012, "grad_norm": 708.2615966796875, "learning_rate": 4.478059009324403e-06, "loss": 106.5555, "step": 71770 }, { "epoch": 0.5937874839723705, "grad_norm": 427.41162109375, "learning_rate": 4.476656368395126e-06, "loss": 91.2204, "step": 71780 }, { "epoch": 0.5938702072217397, "grad_norm": 744.795654296875, "learning_rate": 4.4752537691059e-06, "loss": 78.5461, "step": 71790 }, { "epoch": 0.5939529304711089, "grad_norm": 1040.6856689453125, "learning_rate": 4.473851211568323e-06, "loss": 105.2389, "step": 71800 }, { "epoch": 0.5940356537204782, "grad_norm": 558.6827392578125, "learning_rate": 4.472448695893991e-06, "loss": 97.6422, "step": 71810 }, { "epoch": 0.5941183769698474, "grad_norm": 1102.7012939453125, "learning_rate": 4.471046222194494e-06, "loss": 104.5667, "step": 71820 }, { "epoch": 0.5942011002192166, "grad_norm": 1997.569091796875, "learning_rate": 4.469643790581422e-06, "loss": 84.4513, "step": 71830 }, { "epoch": 0.5942838234685859, "grad_norm": 1167.2252197265625, "learning_rate": 4.468241401166359e-06, "loss": 102.4033, "step": 71840 }, { "epoch": 0.5943665467179551, "grad_norm": 1216.229736328125, "learning_rate": 4.466839054060888e-06, "loss": 73.6999, "step": 71850 }, { "epoch": 0.5944492699673243, "grad_norm": 371.2035217285156, "learning_rate": 4.465436749376586e-06, "loss": 87.3126, "step": 71860 }, { "epoch": 0.5945319932166936, "grad_norm": 993.4617309570312, "learning_rate": 4.464034487225031e-06, "loss": 84.5187, "step": 71870 }, { "epoch": 0.5946147164660628, "grad_norm": 668.97802734375, "learning_rate": 4.462632267717789e-06, "loss": 93.8088, "step": 71880 }, { "epoch": 0.594697439715432, "grad_norm": 723.9701538085938, "learning_rate": 4.461230090966433e-06, "loss": 92.2385, "step": 71890 }, { "epoch": 0.5947801629648013, "grad_norm": 1200.336669921875, "learning_rate": 4.4598279570825244e-06, "loss": 108.3329, "step": 71900 }, { "epoch": 0.5948628862141705, "grad_norm": 749.9968872070312, "learning_rate": 4.458425866177628e-06, "loss": 96.5593, "step": 71910 }, { "epoch": 0.5949456094635397, "grad_norm": 984.7999877929688, "learning_rate": 4.457023818363299e-06, "loss": 105.6873, "step": 71920 }, { "epoch": 0.595028332712909, "grad_norm": 679.338134765625, "learning_rate": 4.455621813751093e-06, "loss": 73.7637, "step": 71930 }, { "epoch": 0.5951110559622782, "grad_norm": 1122.4134521484375, "learning_rate": 4.45421985245256e-06, "loss": 98.8108, "step": 71940 }, { "epoch": 0.5951937792116474, "grad_norm": 988.9681396484375, "learning_rate": 4.452817934579249e-06, "loss": 84.0397, "step": 71950 }, { "epoch": 0.5952765024610167, "grad_norm": 918.6084594726562, "learning_rate": 4.451416060242707e-06, "loss": 103.6519, "step": 71960 }, { "epoch": 0.5953592257103859, "grad_norm": 676.3186645507812, "learning_rate": 4.450014229554468e-06, "loss": 80.6245, "step": 71970 }, { "epoch": 0.5954419489597551, "grad_norm": 987.3781127929688, "learning_rate": 4.448612442626073e-06, "loss": 89.4239, "step": 71980 }, { "epoch": 0.5955246722091244, "grad_norm": 1492.4959716796875, "learning_rate": 4.447210699569055e-06, "loss": 85.7063, "step": 71990 }, { "epoch": 0.5956073954584936, "grad_norm": 614.7178955078125, "learning_rate": 4.445809000494945e-06, "loss": 101.3085, "step": 72000 }, { "epoch": 0.5956901187078628, "grad_norm": 1345.1634521484375, "learning_rate": 4.4444073455152705e-06, "loss": 87.2096, "step": 72010 }, { "epoch": 0.5957728419572321, "grad_norm": 536.4259033203125, "learning_rate": 4.443005734741553e-06, "loss": 71.8604, "step": 72020 }, { "epoch": 0.5958555652066013, "grad_norm": 652.9493408203125, "learning_rate": 4.441604168285313e-06, "loss": 121.6692, "step": 72030 }, { "epoch": 0.5959382884559705, "grad_norm": 1071.4775390625, "learning_rate": 4.440202646258067e-06, "loss": 125.7619, "step": 72040 }, { "epoch": 0.5960210117053398, "grad_norm": 540.5606689453125, "learning_rate": 4.4388011687713274e-06, "loss": 76.9964, "step": 72050 }, { "epoch": 0.596103734954709, "grad_norm": 979.0853881835938, "learning_rate": 4.437399735936603e-06, "loss": 86.8726, "step": 72060 }, { "epoch": 0.5961864582040782, "grad_norm": 1528.9964599609375, "learning_rate": 4.435998347865399e-06, "loss": 139.583, "step": 72070 }, { "epoch": 0.5962691814534475, "grad_norm": 581.6967163085938, "learning_rate": 4.4345970046692174e-06, "loss": 79.9947, "step": 72080 }, { "epoch": 0.5963519047028167, "grad_norm": 915.5377807617188, "learning_rate": 4.433195706459558e-06, "loss": 67.4258, "step": 72090 }, { "epoch": 0.5964346279521859, "grad_norm": 1992.865234375, "learning_rate": 4.431794453347915e-06, "loss": 97.0232, "step": 72100 }, { "epoch": 0.5965173512015552, "grad_norm": 653.1949462890625, "learning_rate": 4.430393245445781e-06, "loss": 99.4039, "step": 72110 }, { "epoch": 0.5966000744509244, "grad_norm": 1475.4862060546875, "learning_rate": 4.42899208286464e-06, "loss": 134.1732, "step": 72120 }, { "epoch": 0.5966827977002936, "grad_norm": 985.6160888671875, "learning_rate": 4.427590965715981e-06, "loss": 93.6811, "step": 72130 }, { "epoch": 0.596765520949663, "grad_norm": 1121.754638671875, "learning_rate": 4.426189894111281e-06, "loss": 98.63, "step": 72140 }, { "epoch": 0.5968482441990322, "grad_norm": 778.4381103515625, "learning_rate": 4.4247888681620165e-06, "loss": 95.378, "step": 72150 }, { "epoch": 0.5969309674484014, "grad_norm": 1081.6031494140625, "learning_rate": 4.423387887979663e-06, "loss": 76.984, "step": 72160 }, { "epoch": 0.5970136906977707, "grad_norm": 844.283203125, "learning_rate": 4.421986953675687e-06, "loss": 95.1035, "step": 72170 }, { "epoch": 0.5970964139471399, "grad_norm": 1140.15869140625, "learning_rate": 4.420586065361558e-06, "loss": 98.3029, "step": 72180 }, { "epoch": 0.5971791371965091, "grad_norm": 1047.5072021484375, "learning_rate": 4.419185223148737e-06, "loss": 71.558, "step": 72190 }, { "epoch": 0.5972618604458784, "grad_norm": 512.5674438476562, "learning_rate": 4.417784427148681e-06, "loss": 118.2006, "step": 72200 }, { "epoch": 0.5973445836952476, "grad_norm": 922.7410888671875, "learning_rate": 4.4163836774728466e-06, "loss": 88.5922, "step": 72210 }, { "epoch": 0.5974273069446168, "grad_norm": 695.3885498046875, "learning_rate": 4.414982974232686e-06, "loss": 85.0964, "step": 72220 }, { "epoch": 0.5975100301939861, "grad_norm": 756.3115844726562, "learning_rate": 4.413582317539644e-06, "loss": 84.8309, "step": 72230 }, { "epoch": 0.5975927534433553, "grad_norm": 893.6836547851562, "learning_rate": 4.412181707505167e-06, "loss": 76.9076, "step": 72240 }, { "epoch": 0.5976754766927245, "grad_norm": 830.41943359375, "learning_rate": 4.410781144240692e-06, "loss": 99.8375, "step": 72250 }, { "epoch": 0.5977581999420937, "grad_norm": 520.194580078125, "learning_rate": 4.409380627857658e-06, "loss": 77.2131, "step": 72260 }, { "epoch": 0.597840923191463, "grad_norm": 1167.1990966796875, "learning_rate": 4.4079801584674955e-06, "loss": 106.8954, "step": 72270 }, { "epoch": 0.5979236464408322, "grad_norm": 987.2964477539062, "learning_rate": 4.406579736181636e-06, "loss": 109.5783, "step": 72280 }, { "epoch": 0.5980063696902014, "grad_norm": 755.127197265625, "learning_rate": 4.405179361111503e-06, "loss": 102.5628, "step": 72290 }, { "epoch": 0.5980890929395707, "grad_norm": 858.6559448242188, "learning_rate": 4.403779033368521e-06, "loss": 83.2092, "step": 72300 }, { "epoch": 0.5981718161889399, "grad_norm": 1175.3221435546875, "learning_rate": 4.402378753064102e-06, "loss": 120.481, "step": 72310 }, { "epoch": 0.5982545394383091, "grad_norm": 1265.4703369140625, "learning_rate": 4.400978520309663e-06, "loss": 126.6442, "step": 72320 }, { "epoch": 0.5983372626876784, "grad_norm": 837.8228759765625, "learning_rate": 4.399578335216615e-06, "loss": 83.0502, "step": 72330 }, { "epoch": 0.5984199859370476, "grad_norm": 454.5643310546875, "learning_rate": 4.3981781978963625e-06, "loss": 195.8178, "step": 72340 }, { "epoch": 0.5985027091864168, "grad_norm": 1586.3955078125, "learning_rate": 4.39677810846031e-06, "loss": 104.6424, "step": 72350 }, { "epoch": 0.5985854324357861, "grad_norm": 472.3536376953125, "learning_rate": 4.395378067019854e-06, "loss": 103.8725, "step": 72360 }, { "epoch": 0.5986681556851553, "grad_norm": 1100.48486328125, "learning_rate": 4.39397807368639e-06, "loss": 95.1332, "step": 72370 }, { "epoch": 0.5987508789345245, "grad_norm": 836.9981079101562, "learning_rate": 4.39257812857131e-06, "loss": 88.9254, "step": 72380 }, { "epoch": 0.5988336021838938, "grad_norm": 861.1360473632812, "learning_rate": 4.391178231786003e-06, "loss": 154.0604, "step": 72390 }, { "epoch": 0.598916325433263, "grad_norm": 801.0167846679688, "learning_rate": 4.389778383441847e-06, "loss": 57.2983, "step": 72400 }, { "epoch": 0.5989990486826322, "grad_norm": 737.025390625, "learning_rate": 4.388378583650225e-06, "loss": 106.2906, "step": 72410 }, { "epoch": 0.5990817719320015, "grad_norm": 770.6280517578125, "learning_rate": 4.386978832522512e-06, "loss": 60.7984, "step": 72420 }, { "epoch": 0.5991644951813707, "grad_norm": 454.4928894042969, "learning_rate": 4.38557913017008e-06, "loss": 85.5769, "step": 72430 }, { "epoch": 0.5992472184307399, "grad_norm": 799.1620483398438, "learning_rate": 4.384179476704297e-06, "loss": 69.6634, "step": 72440 }, { "epoch": 0.5993299416801092, "grad_norm": 1317.85693359375, "learning_rate": 4.382779872236527e-06, "loss": 96.343, "step": 72450 }, { "epoch": 0.5994126649294784, "grad_norm": 550.043212890625, "learning_rate": 4.3813803168781295e-06, "loss": 66.2317, "step": 72460 }, { "epoch": 0.5994953881788476, "grad_norm": 359.5018005371094, "learning_rate": 4.379980810740463e-06, "loss": 92.4579, "step": 72470 }, { "epoch": 0.5995781114282169, "grad_norm": 773.148193359375, "learning_rate": 4.378581353934876e-06, "loss": 109.3504, "step": 72480 }, { "epoch": 0.5996608346775861, "grad_norm": 994.1666259765625, "learning_rate": 4.3771819465727185e-06, "loss": 106.4414, "step": 72490 }, { "epoch": 0.5997435579269553, "grad_norm": 1295.1717529296875, "learning_rate": 4.3757825887653345e-06, "loss": 86.4112, "step": 72500 }, { "epoch": 0.5998262811763246, "grad_norm": 1010.71142578125, "learning_rate": 4.374383280624066e-06, "loss": 85.4114, "step": 72510 }, { "epoch": 0.5999090044256938, "grad_norm": 731.9288940429688, "learning_rate": 4.372984022260249e-06, "loss": 104.8326, "step": 72520 }, { "epoch": 0.599991727675063, "grad_norm": 670.1849975585938, "learning_rate": 4.371584813785216e-06, "loss": 86.4142, "step": 72530 }, { "epoch": 0.6000744509244323, "grad_norm": 669.2977905273438, "learning_rate": 4.370185655310295e-06, "loss": 116.1969, "step": 72540 }, { "epoch": 0.6001571741738015, "grad_norm": 729.8284912109375, "learning_rate": 4.368786546946811e-06, "loss": 79.6399, "step": 72550 }, { "epoch": 0.6002398974231707, "grad_norm": 600.2257690429688, "learning_rate": 4.367387488806086e-06, "loss": 77.0065, "step": 72560 }, { "epoch": 0.60032262067254, "grad_norm": 879.3544311523438, "learning_rate": 4.365988480999434e-06, "loss": 83.5863, "step": 72570 }, { "epoch": 0.6004053439219093, "grad_norm": 1029.636962890625, "learning_rate": 4.364589523638168e-06, "loss": 103.5862, "step": 72580 }, { "epoch": 0.6004880671712785, "grad_norm": 1054.551513671875, "learning_rate": 4.363190616833598e-06, "loss": 81.133, "step": 72590 }, { "epoch": 0.6005707904206478, "grad_norm": 1133.460693359375, "learning_rate": 4.361791760697027e-06, "loss": 104.2316, "step": 72600 }, { "epoch": 0.600653513670017, "grad_norm": 795.6099243164062, "learning_rate": 4.360392955339758e-06, "loss": 114.3038, "step": 72610 }, { "epoch": 0.6007362369193862, "grad_norm": 892.427001953125, "learning_rate": 4.358994200873085e-06, "loss": 90.4173, "step": 72620 }, { "epoch": 0.6008189601687555, "grad_norm": 511.8495788574219, "learning_rate": 4.357595497408303e-06, "loss": 71.118, "step": 72630 }, { "epoch": 0.6009016834181247, "grad_norm": 1011.441650390625, "learning_rate": 4.356196845056699e-06, "loss": 97.9198, "step": 72640 }, { "epoch": 0.6009844066674939, "grad_norm": 1414.120361328125, "learning_rate": 4.3547982439295576e-06, "loss": 116.4794, "step": 72650 }, { "epoch": 0.6010671299168632, "grad_norm": 1085.852294921875, "learning_rate": 4.353399694138158e-06, "loss": 69.8157, "step": 72660 }, { "epoch": 0.6011498531662324, "grad_norm": 836.6179809570312, "learning_rate": 4.352001195793778e-06, "loss": 68.6478, "step": 72670 }, { "epoch": 0.6012325764156016, "grad_norm": 1110.6588134765625, "learning_rate": 4.350602749007688e-06, "loss": 70.8156, "step": 72680 }, { "epoch": 0.6013152996649709, "grad_norm": 648.1185302734375, "learning_rate": 4.349204353891158e-06, "loss": 92.2575, "step": 72690 }, { "epoch": 0.6013980229143401, "grad_norm": 998.8807373046875, "learning_rate": 4.347806010555448e-06, "loss": 86.0575, "step": 72700 }, { "epoch": 0.6014807461637093, "grad_norm": 579.22265625, "learning_rate": 4.346407719111823e-06, "loss": 122.89, "step": 72710 }, { "epoch": 0.6015634694130786, "grad_norm": 1019.9585571289062, "learning_rate": 4.3450094796715354e-06, "loss": 119.5738, "step": 72720 }, { "epoch": 0.6016461926624478, "grad_norm": 903.5271606445312, "learning_rate": 4.343611292345839e-06, "loss": 107.9593, "step": 72730 }, { "epoch": 0.601728915911817, "grad_norm": 816.7589111328125, "learning_rate": 4.342213157245979e-06, "loss": 84.478, "step": 72740 }, { "epoch": 0.6018116391611863, "grad_norm": 550.0929565429688, "learning_rate": 4.340815074483199e-06, "loss": 69.082, "step": 72750 }, { "epoch": 0.6018943624105555, "grad_norm": 576.094482421875, "learning_rate": 4.339417044168738e-06, "loss": 81.2501, "step": 72760 }, { "epoch": 0.6019770856599247, "grad_norm": 1026.7596435546875, "learning_rate": 4.338019066413832e-06, "loss": 96.2626, "step": 72770 }, { "epoch": 0.602059808909294, "grad_norm": 739.3609008789062, "learning_rate": 4.33662114132971e-06, "loss": 80.703, "step": 72780 }, { "epoch": 0.6021425321586632, "grad_norm": 1358.84716796875, "learning_rate": 4.335223269027599e-06, "loss": 113.0873, "step": 72790 }, { "epoch": 0.6022252554080324, "grad_norm": 932.6506958007812, "learning_rate": 4.333825449618721e-06, "loss": 76.9534, "step": 72800 }, { "epoch": 0.6023079786574017, "grad_norm": 744.7998046875, "learning_rate": 4.332427683214295e-06, "loss": 95.1433, "step": 72810 }, { "epoch": 0.6023907019067709, "grad_norm": 856.265380859375, "learning_rate": 4.331029969925538e-06, "loss": 69.2679, "step": 72820 }, { "epoch": 0.6024734251561401, "grad_norm": 1203.6240234375, "learning_rate": 4.329632309863652e-06, "loss": 75.6211, "step": 72830 }, { "epoch": 0.6025561484055094, "grad_norm": 814.6486206054688, "learning_rate": 4.328234703139847e-06, "loss": 83.6226, "step": 72840 }, { "epoch": 0.6026388716548786, "grad_norm": 1125.9552001953125, "learning_rate": 4.326837149865325e-06, "loss": 120.5001, "step": 72850 }, { "epoch": 0.6027215949042478, "grad_norm": 682.9524536132812, "learning_rate": 4.325439650151281e-06, "loss": 59.8708, "step": 72860 }, { "epoch": 0.6028043181536171, "grad_norm": 1073.811279296875, "learning_rate": 4.324042204108908e-06, "loss": 79.1899, "step": 72870 }, { "epoch": 0.6028870414029863, "grad_norm": 764.7388305664062, "learning_rate": 4.322644811849395e-06, "loss": 75.46, "step": 72880 }, { "epoch": 0.6029697646523555, "grad_norm": 827.508056640625, "learning_rate": 4.321247473483924e-06, "loss": 73.4172, "step": 72890 }, { "epoch": 0.6030524879017248, "grad_norm": 989.9174194335938, "learning_rate": 4.319850189123681e-06, "loss": 79.5693, "step": 72900 }, { "epoch": 0.603135211151094, "grad_norm": 1122.249267578125, "learning_rate": 4.3184529588798335e-06, "loss": 98.8418, "step": 72910 }, { "epoch": 0.6032179344004632, "grad_norm": 922.3504028320312, "learning_rate": 4.3170557828635565e-06, "loss": 139.0302, "step": 72920 }, { "epoch": 0.6033006576498325, "grad_norm": 818.5303955078125, "learning_rate": 4.315658661186016e-06, "loss": 77.4534, "step": 72930 }, { "epoch": 0.6033833808992017, "grad_norm": 795.8893432617188, "learning_rate": 4.314261593958376e-06, "loss": 88.9662, "step": 72940 }, { "epoch": 0.6034661041485709, "grad_norm": 891.454833984375, "learning_rate": 4.3128645812917935e-06, "loss": 83.1203, "step": 72950 }, { "epoch": 0.6035488273979402, "grad_norm": 707.6808471679688, "learning_rate": 4.311467623297423e-06, "loss": 88.5659, "step": 72960 }, { "epoch": 0.6036315506473094, "grad_norm": 584.8573608398438, "learning_rate": 4.310070720086414e-06, "loss": 80.7194, "step": 72970 }, { "epoch": 0.6037142738966786, "grad_norm": 665.1079711914062, "learning_rate": 4.30867387176991e-06, "loss": 73.1562, "step": 72980 }, { "epoch": 0.6037969971460478, "grad_norm": 1153.0792236328125, "learning_rate": 4.307277078459057e-06, "loss": 89.2579, "step": 72990 }, { "epoch": 0.6038797203954172, "grad_norm": 922.5089721679688, "learning_rate": 4.305880340264985e-06, "loss": 100.4802, "step": 73000 }, { "epoch": 0.6039624436447864, "grad_norm": 940.73681640625, "learning_rate": 4.3044836572988285e-06, "loss": 78.375, "step": 73010 }, { "epoch": 0.6040451668941555, "grad_norm": 867.5308227539062, "learning_rate": 4.3030870296717155e-06, "loss": 87.0764, "step": 73020 }, { "epoch": 0.6041278901435249, "grad_norm": 1026.0672607421875, "learning_rate": 4.301690457494769e-06, "loss": 99.893, "step": 73030 }, { "epoch": 0.6042106133928941, "grad_norm": 835.3468017578125, "learning_rate": 4.300293940879108e-06, "loss": 67.1481, "step": 73040 }, { "epoch": 0.6042933366422633, "grad_norm": 1170.596923828125, "learning_rate": 4.298897479935847e-06, "loss": 110.3714, "step": 73050 }, { "epoch": 0.6043760598916326, "grad_norm": 505.8780822753906, "learning_rate": 4.297501074776097e-06, "loss": 94.121, "step": 73060 }, { "epoch": 0.6044587831410018, "grad_norm": 674.9179077148438, "learning_rate": 4.296104725510961e-06, "loss": 79.6617, "step": 73070 }, { "epoch": 0.604541506390371, "grad_norm": 982.7257690429688, "learning_rate": 4.294708432251544e-06, "loss": 82.3634, "step": 73080 }, { "epoch": 0.6046242296397403, "grad_norm": 908.1831665039062, "learning_rate": 4.293312195108938e-06, "loss": 96.1006, "step": 73090 }, { "epoch": 0.6047069528891095, "grad_norm": 594.184814453125, "learning_rate": 4.291916014194238e-06, "loss": 87.1127, "step": 73100 }, { "epoch": 0.6047896761384787, "grad_norm": 1231.35107421875, "learning_rate": 4.290519889618531e-06, "loss": 89.3334, "step": 73110 }, { "epoch": 0.604872399387848, "grad_norm": 1115.8880615234375, "learning_rate": 4.2891238214928995e-06, "loss": 65.0904, "step": 73120 }, { "epoch": 0.6049551226372172, "grad_norm": 802.1785278320312, "learning_rate": 4.287727809928423e-06, "loss": 83.0461, "step": 73130 }, { "epoch": 0.6050378458865864, "grad_norm": 779.43896484375, "learning_rate": 4.286331855036177e-06, "loss": 94.5422, "step": 73140 }, { "epoch": 0.6051205691359557, "grad_norm": 808.46337890625, "learning_rate": 4.284935956927229e-06, "loss": 96.0789, "step": 73150 }, { "epoch": 0.6052032923853249, "grad_norm": 1208.354736328125, "learning_rate": 4.283540115712647e-06, "loss": 92.2509, "step": 73160 }, { "epoch": 0.6052860156346941, "grad_norm": 839.9061889648438, "learning_rate": 4.282144331503488e-06, "loss": 89.2915, "step": 73170 }, { "epoch": 0.6053687388840634, "grad_norm": 876.1185913085938, "learning_rate": 4.280748604410811e-06, "loss": 70.9787, "step": 73180 }, { "epoch": 0.6054514621334326, "grad_norm": 6405.99951171875, "learning_rate": 4.279352934545666e-06, "loss": 128.2961, "step": 73190 }, { "epoch": 0.6055341853828018, "grad_norm": 900.7869873046875, "learning_rate": 4.277957322019101e-06, "loss": 131.7455, "step": 73200 }, { "epoch": 0.6056169086321711, "grad_norm": 611.6780395507812, "learning_rate": 4.276561766942158e-06, "loss": 79.8027, "step": 73210 }, { "epoch": 0.6056996318815403, "grad_norm": 584.4486083984375, "learning_rate": 4.275166269425874e-06, "loss": 94.241, "step": 73220 }, { "epoch": 0.6057823551309095, "grad_norm": 680.3448486328125, "learning_rate": 4.273770829581285e-06, "loss": 82.9358, "step": 73230 }, { "epoch": 0.6058650783802788, "grad_norm": 1168.8824462890625, "learning_rate": 4.272375447519418e-06, "loss": 84.9492, "step": 73240 }, { "epoch": 0.605947801629648, "grad_norm": 1853.1300048828125, "learning_rate": 4.270980123351299e-06, "loss": 88.8922, "step": 73250 }, { "epoch": 0.6060305248790172, "grad_norm": 588.9846801757812, "learning_rate": 4.269584857187942e-06, "loss": 86.5293, "step": 73260 }, { "epoch": 0.6061132481283865, "grad_norm": 684.2811889648438, "learning_rate": 4.268189649140369e-06, "loss": 75.5877, "step": 73270 }, { "epoch": 0.6061959713777557, "grad_norm": 751.708251953125, "learning_rate": 4.266794499319585e-06, "loss": 94.9124, "step": 73280 }, { "epoch": 0.6062786946271249, "grad_norm": 709.6283569335938, "learning_rate": 4.265399407836598e-06, "loss": 88.3451, "step": 73290 }, { "epoch": 0.6063614178764942, "grad_norm": 558.1085815429688, "learning_rate": 4.26400437480241e-06, "loss": 131.0363, "step": 73300 }, { "epoch": 0.6064441411258634, "grad_norm": 655.4963989257812, "learning_rate": 4.262609400328015e-06, "loss": 95.9021, "step": 73310 }, { "epoch": 0.6065268643752326, "grad_norm": 762.81689453125, "learning_rate": 4.2612144845244044e-06, "loss": 92.3398, "step": 73320 }, { "epoch": 0.6066095876246019, "grad_norm": 1324.61962890625, "learning_rate": 4.259819627502571e-06, "loss": 86.3487, "step": 73330 }, { "epoch": 0.6066923108739711, "grad_norm": 915.4453125, "learning_rate": 4.258424829373491e-06, "loss": 95.3454, "step": 73340 }, { "epoch": 0.6067750341233403, "grad_norm": 1026.8973388671875, "learning_rate": 4.257030090248142e-06, "loss": 121.2151, "step": 73350 }, { "epoch": 0.6068577573727096, "grad_norm": 995.72412109375, "learning_rate": 4.2556354102374994e-06, "loss": 93.051, "step": 73360 }, { "epoch": 0.6069404806220788, "grad_norm": 1735.9681396484375, "learning_rate": 4.254240789452532e-06, "loss": 123.005, "step": 73370 }, { "epoch": 0.607023203871448, "grad_norm": 819.5543212890625, "learning_rate": 4.252846228004203e-06, "loss": 89.1001, "step": 73380 }, { "epoch": 0.6071059271208173, "grad_norm": 981.048828125, "learning_rate": 4.25145172600347e-06, "loss": 94.8838, "step": 73390 }, { "epoch": 0.6071886503701865, "grad_norm": 996.7430419921875, "learning_rate": 4.2500572835612876e-06, "loss": 130.0341, "step": 73400 }, { "epoch": 0.6072713736195557, "grad_norm": 890.7987060546875, "learning_rate": 4.248662900788605e-06, "loss": 95.0139, "step": 73410 }, { "epoch": 0.607354096868925, "grad_norm": 766.2911376953125, "learning_rate": 4.247268577796368e-06, "loss": 61.7691, "step": 73420 }, { "epoch": 0.6074368201182943, "grad_norm": 953.1488037109375, "learning_rate": 4.245874314695516e-06, "loss": 146.5053, "step": 73430 }, { "epoch": 0.6075195433676635, "grad_norm": 1011.7604370117188, "learning_rate": 4.244480111596984e-06, "loss": 85.3021, "step": 73440 }, { "epoch": 0.6076022666170328, "grad_norm": 575.0761108398438, "learning_rate": 4.2430859686117e-06, "loss": 95.1132, "step": 73450 }, { "epoch": 0.607684989866402, "grad_norm": 753.287353515625, "learning_rate": 4.241691885850593e-06, "loss": 75.5522, "step": 73460 }, { "epoch": 0.6077677131157712, "grad_norm": 2446.799072265625, "learning_rate": 4.240297863424582e-06, "loss": 84.9961, "step": 73470 }, { "epoch": 0.6078504363651405, "grad_norm": 1110.462158203125, "learning_rate": 4.2389039014445846e-06, "loss": 92.255, "step": 73480 }, { "epoch": 0.6079331596145097, "grad_norm": 927.8515014648438, "learning_rate": 4.23751000002151e-06, "loss": 101.0412, "step": 73490 }, { "epoch": 0.6080158828638789, "grad_norm": 1213.8529052734375, "learning_rate": 4.2361161592662655e-06, "loss": 91.7586, "step": 73500 }, { "epoch": 0.6080986061132482, "grad_norm": 960.1309814453125, "learning_rate": 4.234722379289753e-06, "loss": 129.1659, "step": 73510 }, { "epoch": 0.6081813293626174, "grad_norm": 704.3455810546875, "learning_rate": 4.233328660202869e-06, "loss": 67.6366, "step": 73520 }, { "epoch": 0.6082640526119866, "grad_norm": 843.2509765625, "learning_rate": 4.231935002116504e-06, "loss": 78.1124, "step": 73530 }, { "epoch": 0.6083467758613559, "grad_norm": 598.6126708984375, "learning_rate": 4.230541405141546e-06, "loss": 81.4852, "step": 73540 }, { "epoch": 0.6084294991107251, "grad_norm": 1050.9449462890625, "learning_rate": 4.229147869388875e-06, "loss": 94.9926, "step": 73550 }, { "epoch": 0.6085122223600943, "grad_norm": 839.3172607421875, "learning_rate": 4.227754394969373e-06, "loss": 104.2477, "step": 73560 }, { "epoch": 0.6085949456094636, "grad_norm": 916.7781372070312, "learning_rate": 4.226360981993909e-06, "loss": 104.4644, "step": 73570 }, { "epoch": 0.6086776688588328, "grad_norm": 1136.1072998046875, "learning_rate": 4.224967630573351e-06, "loss": 97.9894, "step": 73580 }, { "epoch": 0.608760392108202, "grad_norm": 1269.821044921875, "learning_rate": 4.2235743408185635e-06, "loss": 80.4433, "step": 73590 }, { "epoch": 0.6088431153575713, "grad_norm": 922.0454711914062, "learning_rate": 4.222181112840401e-06, "loss": 86.2411, "step": 73600 }, { "epoch": 0.6089258386069405, "grad_norm": 841.0752563476562, "learning_rate": 4.220787946749717e-06, "loss": 89.3184, "step": 73610 }, { "epoch": 0.6090085618563097, "grad_norm": 681.7354736328125, "learning_rate": 4.219394842657361e-06, "loss": 86.7437, "step": 73620 }, { "epoch": 0.609091285105679, "grad_norm": 935.4651489257812, "learning_rate": 4.218001800674174e-06, "loss": 88.4934, "step": 73630 }, { "epoch": 0.6091740083550482, "grad_norm": 675.4620971679688, "learning_rate": 4.216608820910995e-06, "loss": 67.0971, "step": 73640 }, { "epoch": 0.6092567316044174, "grad_norm": 710.9379272460938, "learning_rate": 4.2152159034786554e-06, "loss": 62.8751, "step": 73650 }, { "epoch": 0.6093394548537867, "grad_norm": 829.56103515625, "learning_rate": 4.213823048487987e-06, "loss": 108.2473, "step": 73660 }, { "epoch": 0.6094221781031559, "grad_norm": 757.4158935546875, "learning_rate": 4.212430256049809e-06, "loss": 99.3676, "step": 73670 }, { "epoch": 0.6095049013525251, "grad_norm": 564.0368041992188, "learning_rate": 4.2110375262749435e-06, "loss": 86.9687, "step": 73680 }, { "epoch": 0.6095876246018944, "grad_norm": 513.57470703125, "learning_rate": 4.209644859274199e-06, "loss": 70.3236, "step": 73690 }, { "epoch": 0.6096703478512636, "grad_norm": 1249.1611328125, "learning_rate": 4.208252255158387e-06, "loss": 94.686, "step": 73700 }, { "epoch": 0.6097530711006328, "grad_norm": 1189.990478515625, "learning_rate": 4.2068597140383084e-06, "loss": 114.4235, "step": 73710 }, { "epoch": 0.609835794350002, "grad_norm": 561.5841674804688, "learning_rate": 4.205467236024763e-06, "loss": 97.9067, "step": 73720 }, { "epoch": 0.6099185175993713, "grad_norm": 1291.4609375, "learning_rate": 4.204074821228542e-06, "loss": 97.8942, "step": 73730 }, { "epoch": 0.6100012408487405, "grad_norm": 960.28076171875, "learning_rate": 4.202682469760436e-06, "loss": 82.6604, "step": 73740 }, { "epoch": 0.6100839640981097, "grad_norm": 1440.652587890625, "learning_rate": 4.2012901817312255e-06, "loss": 116.6677, "step": 73750 }, { "epoch": 0.610166687347479, "grad_norm": 665.2211303710938, "learning_rate": 4.199897957251693e-06, "loss": 82.3861, "step": 73760 }, { "epoch": 0.6102494105968482, "grad_norm": 2500.0634765625, "learning_rate": 4.198505796432605e-06, "loss": 163.031, "step": 73770 }, { "epoch": 0.6103321338462174, "grad_norm": 365.54327392578125, "learning_rate": 4.197113699384732e-06, "loss": 84.3422, "step": 73780 }, { "epoch": 0.6104148570955867, "grad_norm": 1687.4744873046875, "learning_rate": 4.1957216662188385e-06, "loss": 114.6087, "step": 73790 }, { "epoch": 0.6104975803449559, "grad_norm": 1095.3680419921875, "learning_rate": 4.194329697045681e-06, "loss": 79.8403, "step": 73800 }, { "epoch": 0.6105803035943251, "grad_norm": 939.5096435546875, "learning_rate": 4.19293779197601e-06, "loss": 76.4956, "step": 73810 }, { "epoch": 0.6106630268436944, "grad_norm": 1039.002197265625, "learning_rate": 4.191545951120577e-06, "loss": 127.6832, "step": 73820 }, { "epoch": 0.6107457500930636, "grad_norm": 605.078857421875, "learning_rate": 4.190154174590122e-06, "loss": 107.0245, "step": 73830 }, { "epoch": 0.6108284733424328, "grad_norm": 1389.932373046875, "learning_rate": 4.188762462495381e-06, "loss": 127.3608, "step": 73840 }, { "epoch": 0.6109111965918022, "grad_norm": 716.9305419921875, "learning_rate": 4.187370814947091e-06, "loss": 74.3108, "step": 73850 }, { "epoch": 0.6109939198411714, "grad_norm": 835.6632690429688, "learning_rate": 4.185979232055975e-06, "loss": 104.9701, "step": 73860 }, { "epoch": 0.6110766430905405, "grad_norm": 960.3486938476562, "learning_rate": 4.184587713932755e-06, "loss": 118.767, "step": 73870 }, { "epoch": 0.6111593663399099, "grad_norm": 811.2726440429688, "learning_rate": 4.183196260688147e-06, "loss": 83.4491, "step": 73880 }, { "epoch": 0.6112420895892791, "grad_norm": 1344.4681396484375, "learning_rate": 4.1818048724328646e-06, "loss": 99.5997, "step": 73890 }, { "epoch": 0.6113248128386483, "grad_norm": 1349.948486328125, "learning_rate": 4.180413549277614e-06, "loss": 101.2212, "step": 73900 }, { "epoch": 0.6114075360880176, "grad_norm": 646.7472534179688, "learning_rate": 4.1790222913330955e-06, "loss": 89.3753, "step": 73910 }, { "epoch": 0.6114902593373868, "grad_norm": 920.3361206054688, "learning_rate": 4.1776310987100054e-06, "loss": 97.7225, "step": 73920 }, { "epoch": 0.611572982586756, "grad_norm": 1254.9158935546875, "learning_rate": 4.1762399715190366e-06, "loss": 104.7424, "step": 73930 }, { "epoch": 0.6116557058361253, "grad_norm": 821.2960815429688, "learning_rate": 4.1748489098708715e-06, "loss": 106.9926, "step": 73940 }, { "epoch": 0.6117384290854945, "grad_norm": 716.6043701171875, "learning_rate": 4.173457913876191e-06, "loss": 92.9678, "step": 73950 }, { "epoch": 0.6118211523348637, "grad_norm": 1183.022216796875, "learning_rate": 4.172066983645671e-06, "loss": 117.3768, "step": 73960 }, { "epoch": 0.611903875584233, "grad_norm": 836.3502807617188, "learning_rate": 4.170676119289982e-06, "loss": 81.8189, "step": 73970 }, { "epoch": 0.6119865988336022, "grad_norm": 491.899658203125, "learning_rate": 4.1692853209197865e-06, "loss": 81.1659, "step": 73980 }, { "epoch": 0.6120693220829714, "grad_norm": 1108.9029541015625, "learning_rate": 4.167894588645746e-06, "loss": 63.8417, "step": 73990 }, { "epoch": 0.6121520453323407, "grad_norm": 1243.21533203125, "learning_rate": 4.166503922578516e-06, "loss": 71.4258, "step": 74000 }, { "epoch": 0.6122347685817099, "grad_norm": 1081.2105712890625, "learning_rate": 4.165113322828743e-06, "loss": 110.2544, "step": 74010 }, { "epoch": 0.6123174918310791, "grad_norm": 857.2208862304688, "learning_rate": 4.163722789507071e-06, "loss": 97.8825, "step": 74020 }, { "epoch": 0.6124002150804484, "grad_norm": 1000.3453369140625, "learning_rate": 4.162332322724139e-06, "loss": 64.7686, "step": 74030 }, { "epoch": 0.6124829383298176, "grad_norm": 536.7930297851562, "learning_rate": 4.16094192259058e-06, "loss": 82.5932, "step": 74040 }, { "epoch": 0.6125656615791868, "grad_norm": 1039.748779296875, "learning_rate": 4.15955158921702e-06, "loss": 94.5069, "step": 74050 }, { "epoch": 0.6126483848285561, "grad_norm": 903.4092407226562, "learning_rate": 4.158161322714085e-06, "loss": 93.7216, "step": 74060 }, { "epoch": 0.6127311080779253, "grad_norm": 1123.583740234375, "learning_rate": 4.1567711231923876e-06, "loss": 97.1475, "step": 74070 }, { "epoch": 0.6128138313272945, "grad_norm": 614.0092163085938, "learning_rate": 4.155380990762542e-06, "loss": 69.82, "step": 74080 }, { "epoch": 0.6128965545766638, "grad_norm": 436.3868103027344, "learning_rate": 4.153990925535157e-06, "loss": 85.2651, "step": 74090 }, { "epoch": 0.612979277826033, "grad_norm": 1019.7743530273438, "learning_rate": 4.15260092762083e-06, "loss": 68.9177, "step": 74100 }, { "epoch": 0.6130620010754022, "grad_norm": 1233.597412109375, "learning_rate": 4.151210997130159e-06, "loss": 126.1689, "step": 74110 }, { "epoch": 0.6131447243247715, "grad_norm": 1863.585205078125, "learning_rate": 4.1498211341737335e-06, "loss": 109.2528, "step": 74120 }, { "epoch": 0.6132274475741407, "grad_norm": 603.75634765625, "learning_rate": 4.148431338862138e-06, "loss": 83.3835, "step": 74130 }, { "epoch": 0.6133101708235099, "grad_norm": 1063.5419921875, "learning_rate": 4.147041611305952e-06, "loss": 81.7966, "step": 74140 }, { "epoch": 0.6133928940728792, "grad_norm": 553.776123046875, "learning_rate": 4.145651951615752e-06, "loss": 103.0459, "step": 74150 }, { "epoch": 0.6134756173222484, "grad_norm": 783.0980834960938, "learning_rate": 4.144262359902104e-06, "loss": 88.7107, "step": 74160 }, { "epoch": 0.6135583405716176, "grad_norm": 1454.119873046875, "learning_rate": 4.142872836275572e-06, "loss": 95.9363, "step": 74170 }, { "epoch": 0.6136410638209869, "grad_norm": 1249.62548828125, "learning_rate": 4.141483380846716e-06, "loss": 85.594, "step": 74180 }, { "epoch": 0.6137237870703561, "grad_norm": 1073.4764404296875, "learning_rate": 4.1400939937260894e-06, "loss": 78.375, "step": 74190 }, { "epoch": 0.6138065103197253, "grad_norm": 762.4814453125, "learning_rate": 4.138704675024235e-06, "loss": 87.9466, "step": 74200 }, { "epoch": 0.6138892335690946, "grad_norm": 1005.1492919921875, "learning_rate": 4.137315424851696e-06, "loss": 99.7527, "step": 74210 }, { "epoch": 0.6139719568184638, "grad_norm": 892.3167114257812, "learning_rate": 4.1359262433190105e-06, "loss": 88.9903, "step": 74220 }, { "epoch": 0.614054680067833, "grad_norm": 736.7813110351562, "learning_rate": 4.134537130536708e-06, "loss": 72.6644, "step": 74230 }, { "epoch": 0.6141374033172023, "grad_norm": 2169.072998046875, "learning_rate": 4.133148086615314e-06, "loss": 91.2334, "step": 74240 }, { "epoch": 0.6142201265665715, "grad_norm": 918.8009643554688, "learning_rate": 4.131759111665349e-06, "loss": 88.9064, "step": 74250 }, { "epoch": 0.6143028498159407, "grad_norm": 991.6587524414062, "learning_rate": 4.130370205797326e-06, "loss": 126.6198, "step": 74260 }, { "epoch": 0.61438557306531, "grad_norm": 626.6166381835938, "learning_rate": 4.128981369121754e-06, "loss": 101.1643, "step": 74270 }, { "epoch": 0.6144682963146793, "grad_norm": 1045.540283203125, "learning_rate": 4.127592601749141e-06, "loss": 100.4899, "step": 74280 }, { "epoch": 0.6145510195640485, "grad_norm": 948.6563720703125, "learning_rate": 4.1262039037899775e-06, "loss": 92.1499, "step": 74290 }, { "epoch": 0.6146337428134178, "grad_norm": 672.4278564453125, "learning_rate": 4.12481527535476e-06, "loss": 90.7191, "step": 74300 }, { "epoch": 0.614716466062787, "grad_norm": 1149.4989013671875, "learning_rate": 4.123426716553972e-06, "loss": 97.7278, "step": 74310 }, { "epoch": 0.6147991893121562, "grad_norm": 1198.019775390625, "learning_rate": 4.122038227498101e-06, "loss": 84.5663, "step": 74320 }, { "epoch": 0.6148819125615255, "grad_norm": 1210.01611328125, "learning_rate": 4.120649808297616e-06, "loss": 116.1529, "step": 74330 }, { "epoch": 0.6149646358108947, "grad_norm": 815.8023681640625, "learning_rate": 4.119261459062992e-06, "loss": 94.437, "step": 74340 }, { "epoch": 0.6150473590602639, "grad_norm": 616.3191528320312, "learning_rate": 4.1178731799046915e-06, "loss": 110.6811, "step": 74350 }, { "epoch": 0.6151300823096332, "grad_norm": 938.7825927734375, "learning_rate": 4.116484970933174e-06, "loss": 85.8687, "step": 74360 }, { "epoch": 0.6152128055590024, "grad_norm": 754.9133911132812, "learning_rate": 4.1150968322588915e-06, "loss": 85.892, "step": 74370 }, { "epoch": 0.6152955288083716, "grad_norm": 982.1900024414062, "learning_rate": 4.113708763992294e-06, "loss": 98.6362, "step": 74380 }, { "epoch": 0.6153782520577409, "grad_norm": 1620.11767578125, "learning_rate": 4.1123207662438216e-06, "loss": 99.9468, "step": 74390 }, { "epoch": 0.6154609753071101, "grad_norm": 843.5370483398438, "learning_rate": 4.110932839123911e-06, "loss": 102.8236, "step": 74400 }, { "epoch": 0.6155436985564793, "grad_norm": 1027.692138671875, "learning_rate": 4.109544982742995e-06, "loss": 87.6993, "step": 74410 }, { "epoch": 0.6156264218058485, "grad_norm": 922.636962890625, "learning_rate": 4.108157197211499e-06, "loss": 91.7235, "step": 74420 }, { "epoch": 0.6157091450552178, "grad_norm": 592.3994750976562, "learning_rate": 4.1067694826398405e-06, "loss": 106.6859, "step": 74430 }, { "epoch": 0.615791868304587, "grad_norm": 576.2105102539062, "learning_rate": 4.105381839138436e-06, "loss": 67.9388, "step": 74440 }, { "epoch": 0.6158745915539562, "grad_norm": 1076.5267333984375, "learning_rate": 4.103994266817694e-06, "loss": 81.3077, "step": 74450 }, { "epoch": 0.6159573148033255, "grad_norm": 1463.522216796875, "learning_rate": 4.102606765788014e-06, "loss": 121.3816, "step": 74460 }, { "epoch": 0.6160400380526947, "grad_norm": 523.5887451171875, "learning_rate": 4.101219336159795e-06, "loss": 96.6892, "step": 74470 }, { "epoch": 0.6161227613020639, "grad_norm": 801.384765625, "learning_rate": 4.09983197804343e-06, "loss": 90.4624, "step": 74480 }, { "epoch": 0.6162054845514332, "grad_norm": 815.4998168945312, "learning_rate": 4.098444691549302e-06, "loss": 85.0159, "step": 74490 }, { "epoch": 0.6162882078008024, "grad_norm": 1301.9669189453125, "learning_rate": 4.097057476787792e-06, "loss": 101.675, "step": 74500 }, { "epoch": 0.6163709310501716, "grad_norm": 2187.890869140625, "learning_rate": 4.0956703338692755e-06, "loss": 93.8541, "step": 74510 }, { "epoch": 0.6164536542995409, "grad_norm": 2067.32470703125, "learning_rate": 4.09428326290412e-06, "loss": 100.0631, "step": 74520 }, { "epoch": 0.6165363775489101, "grad_norm": 1015.9877319335938, "learning_rate": 4.092896264002689e-06, "loss": 88.7052, "step": 74530 }, { "epoch": 0.6166191007982793, "grad_norm": 1361.1781005859375, "learning_rate": 4.09150933727534e-06, "loss": 98.1841, "step": 74540 }, { "epoch": 0.6167018240476486, "grad_norm": 1028.168701171875, "learning_rate": 4.0901224828324225e-06, "loss": 92.4252, "step": 74550 }, { "epoch": 0.6167845472970178, "grad_norm": 841.4468383789062, "learning_rate": 4.088735700784283e-06, "loss": 96.4391, "step": 74560 }, { "epoch": 0.616867270546387, "grad_norm": 941.0628662109375, "learning_rate": 4.087348991241262e-06, "loss": 79.4249, "step": 74570 }, { "epoch": 0.6169499937957563, "grad_norm": 550.6486206054688, "learning_rate": 4.0859623543136935e-06, "loss": 71.2127, "step": 74580 }, { "epoch": 0.6170327170451255, "grad_norm": 396.369140625, "learning_rate": 4.084575790111905e-06, "loss": 119.8758, "step": 74590 }, { "epoch": 0.6171154402944947, "grad_norm": 714.75634765625, "learning_rate": 4.08318929874622e-06, "loss": 97.5772, "step": 74600 }, { "epoch": 0.617198163543864, "grad_norm": 1025.8707275390625, "learning_rate": 4.081802880326955e-06, "loss": 109.0059, "step": 74610 }, { "epoch": 0.6172808867932332, "grad_norm": 819.5011596679688, "learning_rate": 4.080416534964422e-06, "loss": 106.0014, "step": 74620 }, { "epoch": 0.6173636100426024, "grad_norm": 628.18359375, "learning_rate": 4.079030262768924e-06, "loss": 99.0313, "step": 74630 }, { "epoch": 0.6174463332919717, "grad_norm": 1026.5445556640625, "learning_rate": 4.077644063850761e-06, "loss": 106.1461, "step": 74640 }, { "epoch": 0.6175290565413409, "grad_norm": 479.8872985839844, "learning_rate": 4.076257938320226e-06, "loss": 77.8964, "step": 74650 }, { "epoch": 0.6176117797907101, "grad_norm": 782.7094116210938, "learning_rate": 4.074871886287609e-06, "loss": 123.7894, "step": 74660 }, { "epoch": 0.6176945030400794, "grad_norm": 708.5579833984375, "learning_rate": 4.073485907863189e-06, "loss": 62.82, "step": 74670 }, { "epoch": 0.6177772262894486, "grad_norm": 1126.7977294921875, "learning_rate": 4.0721000031572445e-06, "loss": 85.1428, "step": 74680 }, { "epoch": 0.6178599495388178, "grad_norm": 664.0321655273438, "learning_rate": 4.070714172280043e-06, "loss": 87.29, "step": 74690 }, { "epoch": 0.6179426727881872, "grad_norm": 716.9480590820312, "learning_rate": 4.06932841534185e-06, "loss": 85.6546, "step": 74700 }, { "epoch": 0.6180253960375564, "grad_norm": 894.58935546875, "learning_rate": 4.067942732452926e-06, "loss": 98.9088, "step": 74710 }, { "epoch": 0.6181081192869256, "grad_norm": 844.1010131835938, "learning_rate": 4.06655712372352e-06, "loss": 114.6562, "step": 74720 }, { "epoch": 0.6181908425362949, "grad_norm": 735.4295654296875, "learning_rate": 4.065171589263878e-06, "loss": 87.0721, "step": 74730 }, { "epoch": 0.6182735657856641, "grad_norm": 758.739990234375, "learning_rate": 4.063786129184243e-06, "loss": 108.7126, "step": 74740 }, { "epoch": 0.6183562890350333, "grad_norm": 1705.91064453125, "learning_rate": 4.06240074359485e-06, "loss": 130.587, "step": 74750 }, { "epoch": 0.6184390122844026, "grad_norm": 498.7382507324219, "learning_rate": 4.061015432605927e-06, "loss": 76.9041, "step": 74760 }, { "epoch": 0.6185217355337718, "grad_norm": 610.8711547851562, "learning_rate": 4.059630196327696e-06, "loss": 93.8699, "step": 74770 }, { "epoch": 0.618604458783141, "grad_norm": 850.5593872070312, "learning_rate": 4.058245034870375e-06, "loss": 84.9047, "step": 74780 }, { "epoch": 0.6186871820325103, "grad_norm": 833.0304565429688, "learning_rate": 4.056859948344175e-06, "loss": 71.8329, "step": 74790 }, { "epoch": 0.6187699052818795, "grad_norm": 758.42236328125, "learning_rate": 4.0554749368593e-06, "loss": 69.1855, "step": 74800 }, { "epoch": 0.6188526285312487, "grad_norm": 853.135009765625, "learning_rate": 4.054090000525949e-06, "loss": 74.7512, "step": 74810 }, { "epoch": 0.618935351780618, "grad_norm": 817.5387573242188, "learning_rate": 4.052705139454316e-06, "loss": 116.5073, "step": 74820 }, { "epoch": 0.6190180750299872, "grad_norm": 1293.7938232421875, "learning_rate": 4.051320353754586e-06, "loss": 102.931, "step": 74830 }, { "epoch": 0.6191007982793564, "grad_norm": 931.7042236328125, "learning_rate": 4.049935643536943e-06, "loss": 96.7613, "step": 74840 }, { "epoch": 0.6191835215287257, "grad_norm": 495.9801940917969, "learning_rate": 4.048551008911561e-06, "loss": 89.8051, "step": 74850 }, { "epoch": 0.6192662447780949, "grad_norm": 587.1924438476562, "learning_rate": 4.0471664499886074e-06, "loss": 116.4916, "step": 74860 }, { "epoch": 0.6193489680274641, "grad_norm": 769.1555786132812, "learning_rate": 4.045781966878247e-06, "loss": 85.5647, "step": 74870 }, { "epoch": 0.6194316912768334, "grad_norm": 1216.7843017578125, "learning_rate": 4.044397559690638e-06, "loss": 108.6549, "step": 74880 }, { "epoch": 0.6195144145262026, "grad_norm": 764.6852416992188, "learning_rate": 4.043013228535928e-06, "loss": 113.6466, "step": 74890 }, { "epoch": 0.6195971377755718, "grad_norm": 934.8550415039062, "learning_rate": 4.041628973524264e-06, "loss": 130.2357, "step": 74900 }, { "epoch": 0.6196798610249411, "grad_norm": 377.97088623046875, "learning_rate": 4.040244794765783e-06, "loss": 85.8722, "step": 74910 }, { "epoch": 0.6197625842743103, "grad_norm": 1394.6834716796875, "learning_rate": 4.03886069237062e-06, "loss": 91.4125, "step": 74920 }, { "epoch": 0.6198453075236795, "grad_norm": 595.1375122070312, "learning_rate": 4.037476666448899e-06, "loss": 71.7473, "step": 74930 }, { "epoch": 0.6199280307730488, "grad_norm": 595.4174194335938, "learning_rate": 4.0360927171107436e-06, "loss": 93.8769, "step": 74940 }, { "epoch": 0.620010754022418, "grad_norm": 718.7639770507812, "learning_rate": 4.034708844466267e-06, "loss": 102.3341, "step": 74950 }, { "epoch": 0.6200934772717872, "grad_norm": 968.3093872070312, "learning_rate": 4.033325048625578e-06, "loss": 90.3649, "step": 74960 }, { "epoch": 0.6201762005211565, "grad_norm": 1063.10009765625, "learning_rate": 4.031941329698778e-06, "loss": 104.0913, "step": 74970 }, { "epoch": 0.6202589237705257, "grad_norm": 1573.7166748046875, "learning_rate": 4.030557687795965e-06, "loss": 92.6962, "step": 74980 }, { "epoch": 0.6203416470198949, "grad_norm": 1660.5982666015625, "learning_rate": 4.029174123027226e-06, "loss": 135.9649, "step": 74990 }, { "epoch": 0.6204243702692642, "grad_norm": 1034.1180419921875, "learning_rate": 4.027790635502646e-06, "loss": 94.0106, "step": 75000 }, { "epoch": 0.6205070935186334, "grad_norm": 742.1674194335938, "learning_rate": 4.026407225332305e-06, "loss": 109.5858, "step": 75010 }, { "epoch": 0.6205898167680026, "grad_norm": 586.0728759765625, "learning_rate": 4.025023892626272e-06, "loss": 76.6323, "step": 75020 }, { "epoch": 0.6206725400173719, "grad_norm": 1411.9517822265625, "learning_rate": 4.023640637494612e-06, "loss": 135.056, "step": 75030 }, { "epoch": 0.6207552632667411, "grad_norm": 1079.8941650390625, "learning_rate": 4.022257460047387e-06, "loss": 97.0648, "step": 75040 }, { "epoch": 0.6208379865161103, "grad_norm": 791.2523803710938, "learning_rate": 4.0208743603946505e-06, "loss": 102.2262, "step": 75050 }, { "epoch": 0.6209207097654796, "grad_norm": 1095.387939453125, "learning_rate": 4.0194913386464445e-06, "loss": 95.2964, "step": 75060 }, { "epoch": 0.6210034330148488, "grad_norm": 601.5514526367188, "learning_rate": 4.018108394912814e-06, "loss": 88.666, "step": 75070 }, { "epoch": 0.621086156264218, "grad_norm": 747.2603149414062, "learning_rate": 4.016725529303792e-06, "loss": 84.6649, "step": 75080 }, { "epoch": 0.6211688795135873, "grad_norm": 619.85791015625, "learning_rate": 4.015342741929407e-06, "loss": 73.6812, "step": 75090 }, { "epoch": 0.6212516027629565, "grad_norm": 1243.3499755859375, "learning_rate": 4.013960032899681e-06, "loss": 100.2627, "step": 75100 }, { "epoch": 0.6213343260123257, "grad_norm": 491.8591003417969, "learning_rate": 4.012577402324631e-06, "loss": 99.1271, "step": 75110 }, { "epoch": 0.621417049261695, "grad_norm": 955.7748413085938, "learning_rate": 4.011194850314263e-06, "loss": 87.4493, "step": 75120 }, { "epoch": 0.6214997725110643, "grad_norm": 895.6536254882812, "learning_rate": 4.009812376978585e-06, "loss": 111.0081, "step": 75130 }, { "epoch": 0.6215824957604335, "grad_norm": 513.0172119140625, "learning_rate": 4.0084299824275926e-06, "loss": 62.0584, "step": 75140 }, { "epoch": 0.6216652190098026, "grad_norm": 835.1646118164062, "learning_rate": 4.007047666771274e-06, "loss": 112.1609, "step": 75150 }, { "epoch": 0.621747942259172, "grad_norm": 855.74169921875, "learning_rate": 4.005665430119615e-06, "loss": 89.9893, "step": 75160 }, { "epoch": 0.6218306655085412, "grad_norm": 1189.672119140625, "learning_rate": 4.0042832725825954e-06, "loss": 78.6606, "step": 75170 }, { "epoch": 0.6219133887579104, "grad_norm": 690.7192993164062, "learning_rate": 4.002901194270186e-06, "loss": 82.508, "step": 75180 }, { "epoch": 0.6219961120072797, "grad_norm": 519.3690795898438, "learning_rate": 4.001519195292352e-06, "loss": 74.1029, "step": 75190 }, { "epoch": 0.6220788352566489, "grad_norm": 664.5087890625, "learning_rate": 4.000137275759053e-06, "loss": 99.7511, "step": 75200 }, { "epoch": 0.6221615585060181, "grad_norm": 966.7168579101562, "learning_rate": 3.9987554357802435e-06, "loss": 102.3384, "step": 75210 }, { "epoch": 0.6222442817553874, "grad_norm": 1171.9957275390625, "learning_rate": 3.997373675465869e-06, "loss": 74.9776, "step": 75220 }, { "epoch": 0.6223270050047566, "grad_norm": 534.0394897460938, "learning_rate": 3.995991994925869e-06, "loss": 91.2517, "step": 75230 }, { "epoch": 0.6224097282541258, "grad_norm": 833.4016723632812, "learning_rate": 3.994610394270178e-06, "loss": 112.876, "step": 75240 }, { "epoch": 0.6224924515034951, "grad_norm": 833.5524291992188, "learning_rate": 3.993228873608724e-06, "loss": 97.8957, "step": 75250 }, { "epoch": 0.6225751747528643, "grad_norm": 1124.5748291015625, "learning_rate": 3.991847433051427e-06, "loss": 90.5661, "step": 75260 }, { "epoch": 0.6226578980022335, "grad_norm": 701.1575927734375, "learning_rate": 3.990466072708204e-06, "loss": 93.4155, "step": 75270 }, { "epoch": 0.6227406212516028, "grad_norm": 1118.724365234375, "learning_rate": 3.989084792688962e-06, "loss": 114.3592, "step": 75280 }, { "epoch": 0.622823344500972, "grad_norm": 1080.4481201171875, "learning_rate": 3.987703593103604e-06, "loss": 91.7185, "step": 75290 }, { "epoch": 0.6229060677503412, "grad_norm": 1988.3421630859375, "learning_rate": 3.986322474062025e-06, "loss": 110.2386, "step": 75300 }, { "epoch": 0.6229887909997105, "grad_norm": 434.8983459472656, "learning_rate": 3.9849414356741165e-06, "loss": 66.5312, "step": 75310 }, { "epoch": 0.6230715142490797, "grad_norm": 682.07421875, "learning_rate": 3.9835604780497575e-06, "loss": 85.5432, "step": 75320 }, { "epoch": 0.6231542374984489, "grad_norm": 1016.9640502929688, "learning_rate": 3.982179601298827e-06, "loss": 109.5171, "step": 75330 }, { "epoch": 0.6232369607478182, "grad_norm": 893.9984130859375, "learning_rate": 3.9807988055311946e-06, "loss": 78.253, "step": 75340 }, { "epoch": 0.6233196839971874, "grad_norm": 589.87451171875, "learning_rate": 3.979418090856723e-06, "loss": 87.9922, "step": 75350 }, { "epoch": 0.6234024072465566, "grad_norm": 899.80126953125, "learning_rate": 3.978037457385268e-06, "loss": 109.1129, "step": 75360 }, { "epoch": 0.6234851304959259, "grad_norm": 833.7298583984375, "learning_rate": 3.976656905226686e-06, "loss": 104.8421, "step": 75370 }, { "epoch": 0.6235678537452951, "grad_norm": 1044.365234375, "learning_rate": 3.9752764344908155e-06, "loss": 97.9259, "step": 75380 }, { "epoch": 0.6236505769946643, "grad_norm": 1515.5921630859375, "learning_rate": 3.9738960452874975e-06, "loss": 102.9154, "step": 75390 }, { "epoch": 0.6237333002440336, "grad_norm": 1511.9940185546875, "learning_rate": 3.972515737726563e-06, "loss": 114.3788, "step": 75400 }, { "epoch": 0.6238160234934028, "grad_norm": 1036.8448486328125, "learning_rate": 3.9711355119178345e-06, "loss": 144.706, "step": 75410 }, { "epoch": 0.623898746742772, "grad_norm": 655.7908325195312, "learning_rate": 3.969755367971131e-06, "loss": 90.5163, "step": 75420 }, { "epoch": 0.6239814699921413, "grad_norm": 574.1646728515625, "learning_rate": 3.9683753059962646e-06, "loss": 77.3598, "step": 75430 }, { "epoch": 0.6240641932415105, "grad_norm": 615.3191528320312, "learning_rate": 3.966995326103041e-06, "loss": 88.5786, "step": 75440 }, { "epoch": 0.6241469164908797, "grad_norm": 1213.435546875, "learning_rate": 3.965615428401257e-06, "loss": 95.156, "step": 75450 }, { "epoch": 0.624229639740249, "grad_norm": 960.2600708007812, "learning_rate": 3.964235613000708e-06, "loss": 85.2145, "step": 75460 }, { "epoch": 0.6243123629896182, "grad_norm": 616.06689453125, "learning_rate": 3.962855880011177e-06, "loss": 78.2717, "step": 75470 }, { "epoch": 0.6243950862389874, "grad_norm": 560.2363891601562, "learning_rate": 3.961476229542446e-06, "loss": 102.936, "step": 75480 }, { "epoch": 0.6244778094883567, "grad_norm": 473.06793212890625, "learning_rate": 3.9600966617042825e-06, "loss": 72.4044, "step": 75490 }, { "epoch": 0.6245605327377259, "grad_norm": 770.420166015625, "learning_rate": 3.958717176606456e-06, "loss": 99.9721, "step": 75500 }, { "epoch": 0.6246432559870951, "grad_norm": 1100.4041748046875, "learning_rate": 3.957337774358725e-06, "loss": 123.5163, "step": 75510 }, { "epoch": 0.6247259792364644, "grad_norm": 1014.705322265625, "learning_rate": 3.955958455070842e-06, "loss": 105.1985, "step": 75520 }, { "epoch": 0.6248087024858336, "grad_norm": 1663.8048095703125, "learning_rate": 3.954579218852553e-06, "loss": 100.1772, "step": 75530 }, { "epoch": 0.6248914257352028, "grad_norm": 701.8104248046875, "learning_rate": 3.953200065813597e-06, "loss": 100.0598, "step": 75540 }, { "epoch": 0.6249741489845722, "grad_norm": 906.2036743164062, "learning_rate": 3.951820996063708e-06, "loss": 95.372, "step": 75550 }, { "epoch": 0.6250568722339414, "grad_norm": 719.7368774414062, "learning_rate": 3.950442009712612e-06, "loss": 83.6697, "step": 75560 }, { "epoch": 0.6251395954833106, "grad_norm": 703.7513427734375, "learning_rate": 3.949063106870031e-06, "loss": 85.2299, "step": 75570 }, { "epoch": 0.6252223187326799, "grad_norm": 571.2645263671875, "learning_rate": 3.9476842876456735e-06, "loss": 100.489, "step": 75580 }, { "epoch": 0.6253050419820491, "grad_norm": 829.6326293945312, "learning_rate": 3.946305552149247e-06, "loss": 92.2218, "step": 75590 }, { "epoch": 0.6253877652314183, "grad_norm": 716.100341796875, "learning_rate": 3.944926900490452e-06, "loss": 96.999, "step": 75600 }, { "epoch": 0.6254704884807876, "grad_norm": 731.5742797851562, "learning_rate": 3.943548332778982e-06, "loss": 108.1109, "step": 75610 }, { "epoch": 0.6255532117301568, "grad_norm": 586.7816162109375, "learning_rate": 3.942169849124523e-06, "loss": 102.1984, "step": 75620 }, { "epoch": 0.625635934979526, "grad_norm": 583.6131591796875, "learning_rate": 3.940791449636753e-06, "loss": 72.8051, "step": 75630 }, { "epoch": 0.6257186582288953, "grad_norm": 655.7570190429688, "learning_rate": 3.939413134425347e-06, "loss": 92.8459, "step": 75640 }, { "epoch": 0.6258013814782645, "grad_norm": 609.809326171875, "learning_rate": 3.938034903599972e-06, "loss": 67.6845, "step": 75650 }, { "epoch": 0.6258841047276337, "grad_norm": 989.1069946289062, "learning_rate": 3.9366567572702845e-06, "loss": 65.7915, "step": 75660 }, { "epoch": 0.625966827977003, "grad_norm": 746.6648559570312, "learning_rate": 3.935278695545939e-06, "loss": 90.3173, "step": 75670 }, { "epoch": 0.6260495512263722, "grad_norm": 917.3569946289062, "learning_rate": 3.933900718536579e-06, "loss": 103.0491, "step": 75680 }, { "epoch": 0.6261322744757414, "grad_norm": 753.1608276367188, "learning_rate": 3.932522826351849e-06, "loss": 111.5943, "step": 75690 }, { "epoch": 0.6262149977251107, "grad_norm": 582.921142578125, "learning_rate": 3.9311450191013774e-06, "loss": 113.5137, "step": 75700 }, { "epoch": 0.6262977209744799, "grad_norm": 944.9461669921875, "learning_rate": 3.929767296894792e-06, "loss": 77.7904, "step": 75710 }, { "epoch": 0.6263804442238491, "grad_norm": 705.4637451171875, "learning_rate": 3.9283896598417104e-06, "loss": 79.6284, "step": 75720 }, { "epoch": 0.6264631674732184, "grad_norm": 835.1659545898438, "learning_rate": 3.927012108051746e-06, "loss": 89.4407, "step": 75730 }, { "epoch": 0.6265458907225876, "grad_norm": 1340.5850830078125, "learning_rate": 3.925634641634505e-06, "loss": 90.6847, "step": 75740 }, { "epoch": 0.6266286139719568, "grad_norm": 1067.461181640625, "learning_rate": 3.924257260699583e-06, "loss": 90.2306, "step": 75750 }, { "epoch": 0.6267113372213261, "grad_norm": 967.4222412109375, "learning_rate": 3.922879965356574e-06, "loss": 81.9187, "step": 75760 }, { "epoch": 0.6267940604706953, "grad_norm": 1008.221923828125, "learning_rate": 3.921502755715064e-06, "loss": 83.478, "step": 75770 }, { "epoch": 0.6268767837200645, "grad_norm": 506.6917724609375, "learning_rate": 3.920125631884628e-06, "loss": 87.9039, "step": 75780 }, { "epoch": 0.6269595069694338, "grad_norm": 881.6430053710938, "learning_rate": 3.918748593974841e-06, "loss": 84.5617, "step": 75790 }, { "epoch": 0.627042230218803, "grad_norm": 1556.2261962890625, "learning_rate": 3.917371642095265e-06, "loss": 108.6617, "step": 75800 }, { "epoch": 0.6271249534681722, "grad_norm": 1210.6939697265625, "learning_rate": 3.91599477635546e-06, "loss": 89.0075, "step": 75810 }, { "epoch": 0.6272076767175415, "grad_norm": 792.7057495117188, "learning_rate": 3.914617996864976e-06, "loss": 75.4186, "step": 75820 }, { "epoch": 0.6272903999669107, "grad_norm": 965.24072265625, "learning_rate": 3.9132413037333565e-06, "loss": 85.8507, "step": 75830 }, { "epoch": 0.6273731232162799, "grad_norm": 773.074462890625, "learning_rate": 3.911864697070139e-06, "loss": 97.3403, "step": 75840 }, { "epoch": 0.6274558464656492, "grad_norm": 1299.928955078125, "learning_rate": 3.910488176984853e-06, "loss": 127.7312, "step": 75850 }, { "epoch": 0.6275385697150184, "grad_norm": 662.94970703125, "learning_rate": 3.909111743587023e-06, "loss": 90.648, "step": 75860 }, { "epoch": 0.6276212929643876, "grad_norm": 557.4157104492188, "learning_rate": 3.907735396986166e-06, "loss": 110.0861, "step": 75870 }, { "epoch": 0.6277040162137568, "grad_norm": 1387.1092529296875, "learning_rate": 3.9063591372917875e-06, "loss": 83.7509, "step": 75880 }, { "epoch": 0.6277867394631261, "grad_norm": 1644.345947265625, "learning_rate": 3.904982964613395e-06, "loss": 104.4969, "step": 75890 }, { "epoch": 0.6278694627124953, "grad_norm": 663.9248657226562, "learning_rate": 3.903606879060483e-06, "loss": 120.2953, "step": 75900 }, { "epoch": 0.6279521859618645, "grad_norm": 1548.13232421875, "learning_rate": 3.902230880742541e-06, "loss": 109.4352, "step": 75910 }, { "epoch": 0.6280349092112338, "grad_norm": 615.7534790039062, "learning_rate": 3.900854969769049e-06, "loss": 82.1672, "step": 75920 }, { "epoch": 0.628117632460603, "grad_norm": 1001.1287841796875, "learning_rate": 3.899479146249482e-06, "loss": 87.0741, "step": 75930 }, { "epoch": 0.6282003557099722, "grad_norm": 1314.9921875, "learning_rate": 3.898103410293309e-06, "loss": 136.3624, "step": 75940 }, { "epoch": 0.6282830789593415, "grad_norm": 1015.78759765625, "learning_rate": 3.89672776200999e-06, "loss": 87.5703, "step": 75950 }, { "epoch": 0.6283658022087107, "grad_norm": 553.9141235351562, "learning_rate": 3.895352201508981e-06, "loss": 66.2459, "step": 75960 }, { "epoch": 0.6284485254580799, "grad_norm": 585.1502075195312, "learning_rate": 3.893976728899726e-06, "loss": 119.4419, "step": 75970 }, { "epoch": 0.6285312487074493, "grad_norm": 414.6874084472656, "learning_rate": 3.892601344291667e-06, "loss": 68.8029, "step": 75980 }, { "epoch": 0.6286139719568185, "grad_norm": 912.771728515625, "learning_rate": 3.891226047794237e-06, "loss": 89.6607, "step": 75990 }, { "epoch": 0.6286966952061877, "grad_norm": 777.063232421875, "learning_rate": 3.8898508395168645e-06, "loss": 146.2245, "step": 76000 }, { "epoch": 0.628779418455557, "grad_norm": 884.0517578125, "learning_rate": 3.888475719568961e-06, "loss": 87.4548, "step": 76010 }, { "epoch": 0.6288621417049262, "grad_norm": 768.80859375, "learning_rate": 3.887100688059947e-06, "loss": 83.4292, "step": 76020 }, { "epoch": 0.6289448649542954, "grad_norm": 686.2239379882812, "learning_rate": 3.885725745099222e-06, "loss": 100.6639, "step": 76030 }, { "epoch": 0.6290275882036647, "grad_norm": 1023.0579833984375, "learning_rate": 3.8843508907961855e-06, "loss": 81.8008, "step": 76040 }, { "epoch": 0.6291103114530339, "grad_norm": 867.1831665039062, "learning_rate": 3.882976125260229e-06, "loss": 89.1627, "step": 76050 }, { "epoch": 0.6291930347024031, "grad_norm": 1243.499755859375, "learning_rate": 3.881601448600736e-06, "loss": 112.3818, "step": 76060 }, { "epoch": 0.6292757579517724, "grad_norm": 753.3609619140625, "learning_rate": 3.880226860927082e-06, "loss": 79.829, "step": 76070 }, { "epoch": 0.6293584812011416, "grad_norm": 457.8770446777344, "learning_rate": 3.8788523623486405e-06, "loss": 115.6456, "step": 76080 }, { "epoch": 0.6294412044505108, "grad_norm": 2279.8681640625, "learning_rate": 3.877477952974768e-06, "loss": 100.5304, "step": 76090 }, { "epoch": 0.6295239276998801, "grad_norm": 1045.8399658203125, "learning_rate": 3.876103632914825e-06, "loss": 84.072, "step": 76100 }, { "epoch": 0.6296066509492493, "grad_norm": 731.2930908203125, "learning_rate": 3.8747294022781555e-06, "loss": 91.0355, "step": 76110 }, { "epoch": 0.6296893741986185, "grad_norm": 1290.4974365234375, "learning_rate": 3.873355261174105e-06, "loss": 74.0479, "step": 76120 }, { "epoch": 0.6297720974479878, "grad_norm": 842.3202514648438, "learning_rate": 3.871981209712006e-06, "loss": 91.0174, "step": 76130 }, { "epoch": 0.629854820697357, "grad_norm": 1351.25244140625, "learning_rate": 3.870607248001184e-06, "loss": 96.3427, "step": 76140 }, { "epoch": 0.6299375439467262, "grad_norm": 1196.4425048828125, "learning_rate": 3.869233376150961e-06, "loss": 83.2907, "step": 76150 }, { "epoch": 0.6300202671960955, "grad_norm": 630.231689453125, "learning_rate": 3.867859594270649e-06, "loss": 93.8257, "step": 76160 }, { "epoch": 0.6301029904454647, "grad_norm": 636.0516357421875, "learning_rate": 3.866485902469554e-06, "loss": 107.9014, "step": 76170 }, { "epoch": 0.6301857136948339, "grad_norm": 867.7330322265625, "learning_rate": 3.865112300856972e-06, "loss": 98.2939, "step": 76180 }, { "epoch": 0.6302684369442032, "grad_norm": 836.81884765625, "learning_rate": 3.863738789542196e-06, "loss": 91.0884, "step": 76190 }, { "epoch": 0.6303511601935724, "grad_norm": 700.2529296875, "learning_rate": 3.86236536863451e-06, "loss": 95.8659, "step": 76200 }, { "epoch": 0.6304338834429416, "grad_norm": 716.2244262695312, "learning_rate": 3.860992038243189e-06, "loss": 61.6874, "step": 76210 }, { "epoch": 0.6305166066923109, "grad_norm": 410.9248962402344, "learning_rate": 3.8596187984775064e-06, "loss": 99.224, "step": 76220 }, { "epoch": 0.6305993299416801, "grad_norm": 907.3155517578125, "learning_rate": 3.8582456494467214e-06, "loss": 101.9425, "step": 76230 }, { "epoch": 0.6306820531910493, "grad_norm": 579.9906616210938, "learning_rate": 3.8568725912600904e-06, "loss": 71.3855, "step": 76240 }, { "epoch": 0.6307647764404186, "grad_norm": 1267.6279296875, "learning_rate": 3.855499624026861e-06, "loss": 123.397, "step": 76250 }, { "epoch": 0.6308474996897878, "grad_norm": 720.7117309570312, "learning_rate": 3.854126747856275e-06, "loss": 105.1762, "step": 76260 }, { "epoch": 0.630930222939157, "grad_norm": 1166.6748046875, "learning_rate": 3.8527539628575635e-06, "loss": 78.2158, "step": 76270 }, { "epoch": 0.6310129461885263, "grad_norm": 721.6836547851562, "learning_rate": 3.851381269139955e-06, "loss": 75.0298, "step": 76280 }, { "epoch": 0.6310956694378955, "grad_norm": 932.6498413085938, "learning_rate": 3.8500086668126666e-06, "loss": 127.4507, "step": 76290 }, { "epoch": 0.6311783926872647, "grad_norm": 804.4441528320312, "learning_rate": 3.848636155984912e-06, "loss": 88.2771, "step": 76300 }, { "epoch": 0.631261115936634, "grad_norm": 662.4979248046875, "learning_rate": 3.847263736765892e-06, "loss": 76.6129, "step": 76310 }, { "epoch": 0.6313438391860032, "grad_norm": 1362.8896484375, "learning_rate": 3.8458914092648074e-06, "loss": 116.3484, "step": 76320 }, { "epoch": 0.6314265624353724, "grad_norm": 1995.614013671875, "learning_rate": 3.844519173590847e-06, "loss": 85.0176, "step": 76330 }, { "epoch": 0.6315092856847417, "grad_norm": 494.42132568359375, "learning_rate": 3.843147029853194e-06, "loss": 77.5021, "step": 76340 }, { "epoch": 0.6315920089341109, "grad_norm": 1092.7735595703125, "learning_rate": 3.841774978161022e-06, "loss": 107.8559, "step": 76350 }, { "epoch": 0.6316747321834801, "grad_norm": 746.4695434570312, "learning_rate": 3.840403018623499e-06, "loss": 82.9424, "step": 76360 }, { "epoch": 0.6317574554328494, "grad_norm": 911.3170166015625, "learning_rate": 3.839031151349786e-06, "loss": 87.2492, "step": 76370 }, { "epoch": 0.6318401786822186, "grad_norm": 1411.3955078125, "learning_rate": 3.837659376449036e-06, "loss": 103.6988, "step": 76380 }, { "epoch": 0.6319229019315878, "grad_norm": 1356.0250244140625, "learning_rate": 3.836287694030395e-06, "loss": 120.5798, "step": 76390 }, { "epoch": 0.6320056251809572, "grad_norm": 498.0589294433594, "learning_rate": 3.834916104203e-06, "loss": 95.7689, "step": 76400 }, { "epoch": 0.6320883484303264, "grad_norm": 701.58740234375, "learning_rate": 3.833544607075986e-06, "loss": 78.422, "step": 76410 }, { "epoch": 0.6321710716796956, "grad_norm": 596.4544677734375, "learning_rate": 3.8321732027584734e-06, "loss": 94.8889, "step": 76420 }, { "epoch": 0.6322537949290649, "grad_norm": 910.3405151367188, "learning_rate": 3.830801891359582e-06, "loss": 81.3986, "step": 76430 }, { "epoch": 0.6323365181784341, "grad_norm": 494.3802490234375, "learning_rate": 3.829430672988414e-06, "loss": 75.0982, "step": 76440 }, { "epoch": 0.6324192414278033, "grad_norm": 773.7373046875, "learning_rate": 3.828059547754078e-06, "loss": 73.4452, "step": 76450 }, { "epoch": 0.6325019646771726, "grad_norm": 597.4763793945312, "learning_rate": 3.826688515765664e-06, "loss": 94.815, "step": 76460 }, { "epoch": 0.6325846879265418, "grad_norm": 1558.9375, "learning_rate": 3.82531757713226e-06, "loss": 89.0715, "step": 76470 }, { "epoch": 0.632667411175911, "grad_norm": 912.0201416015625, "learning_rate": 3.823946731962945e-06, "loss": 89.6122, "step": 76480 }, { "epoch": 0.6327501344252803, "grad_norm": 803.7633666992188, "learning_rate": 3.8225759803667925e-06, "loss": 90.7924, "step": 76490 }, { "epoch": 0.6328328576746495, "grad_norm": 1232.05712890625, "learning_rate": 3.821205322452863e-06, "loss": 89.2706, "step": 76500 }, { "epoch": 0.6329155809240187, "grad_norm": 831.2234497070312, "learning_rate": 3.81983475833022e-06, "loss": 75.2085, "step": 76510 }, { "epoch": 0.632998304173388, "grad_norm": 560.6333618164062, "learning_rate": 3.818464288107908e-06, "loss": 80.2901, "step": 76520 }, { "epoch": 0.6330810274227572, "grad_norm": 1093.8529052734375, "learning_rate": 3.817093911894968e-06, "loss": 81.6339, "step": 76530 }, { "epoch": 0.6331637506721264, "grad_norm": 787.9254150390625, "learning_rate": 3.8157236298004375e-06, "loss": 98.8886, "step": 76540 }, { "epoch": 0.6332464739214957, "grad_norm": 613.9071655273438, "learning_rate": 3.814353441933343e-06, "loss": 88.6119, "step": 76550 }, { "epoch": 0.6333291971708649, "grad_norm": 1168.52685546875, "learning_rate": 3.812983348402703e-06, "loss": 86.0251, "step": 76560 }, { "epoch": 0.6334119204202341, "grad_norm": 684.206298828125, "learning_rate": 3.811613349317531e-06, "loss": 68.7577, "step": 76570 }, { "epoch": 0.6334946436696034, "grad_norm": 543.0419311523438, "learning_rate": 3.810243444786831e-06, "loss": 85.61, "step": 76580 }, { "epoch": 0.6335773669189726, "grad_norm": 1784.14794921875, "learning_rate": 3.8088736349195995e-06, "loss": 119.1676, "step": 76590 }, { "epoch": 0.6336600901683418, "grad_norm": 1234.8670654296875, "learning_rate": 3.8075039198248274e-06, "loss": 82.1384, "step": 76600 }, { "epoch": 0.633742813417711, "grad_norm": 454.25372314453125, "learning_rate": 3.8061342996114946e-06, "loss": 81.6356, "step": 76610 }, { "epoch": 0.6338255366670803, "grad_norm": 1700.140380859375, "learning_rate": 3.8047647743885762e-06, "loss": 88.7648, "step": 76620 }, { "epoch": 0.6339082599164495, "grad_norm": 1148.6446533203125, "learning_rate": 3.8033953442650382e-06, "loss": 85.314, "step": 76630 }, { "epoch": 0.6339909831658187, "grad_norm": 730.97998046875, "learning_rate": 3.802026009349843e-06, "loss": 98.301, "step": 76640 }, { "epoch": 0.634073706415188, "grad_norm": 672.2242431640625, "learning_rate": 3.800656769751939e-06, "loss": 86.9433, "step": 76650 }, { "epoch": 0.6341564296645572, "grad_norm": 769.8524780273438, "learning_rate": 3.799287625580273e-06, "loss": 93.8155, "step": 76660 }, { "epoch": 0.6342391529139264, "grad_norm": 539.6921997070312, "learning_rate": 3.7979185769437795e-06, "loss": 71.8179, "step": 76670 }, { "epoch": 0.6343218761632957, "grad_norm": 1174.0040283203125, "learning_rate": 3.7965496239513875e-06, "loss": 100.7191, "step": 76680 }, { "epoch": 0.6344045994126649, "grad_norm": 654.21240234375, "learning_rate": 3.79518076671202e-06, "loss": 101.6481, "step": 76690 }, { "epoch": 0.6344873226620341, "grad_norm": 1032.4493408203125, "learning_rate": 3.793812005334589e-06, "loss": 73.4023, "step": 76700 }, { "epoch": 0.6345700459114034, "grad_norm": 1226.67333984375, "learning_rate": 3.792443339928001e-06, "loss": 94.1326, "step": 76710 }, { "epoch": 0.6346527691607726, "grad_norm": 693.3799438476562, "learning_rate": 3.7910747706011543e-06, "loss": 103.0554, "step": 76720 }, { "epoch": 0.6347354924101418, "grad_norm": 427.7120361328125, "learning_rate": 3.7897062974629384e-06, "loss": 101.5577, "step": 76730 }, { "epoch": 0.6348182156595111, "grad_norm": 1844.3143310546875, "learning_rate": 3.78833792062224e-06, "loss": 119.7666, "step": 76740 }, { "epoch": 0.6349009389088803, "grad_norm": 381.83380126953125, "learning_rate": 3.786969640187932e-06, "loss": 89.6403, "step": 76750 }, { "epoch": 0.6349836621582495, "grad_norm": 1052.9390869140625, "learning_rate": 3.785601456268882e-06, "loss": 81.7052, "step": 76760 }, { "epoch": 0.6350663854076188, "grad_norm": 1047.6627197265625, "learning_rate": 3.7842333689739524e-06, "loss": 87.8519, "step": 76770 }, { "epoch": 0.635149108656988, "grad_norm": 623.3661499023438, "learning_rate": 3.782865378411993e-06, "loss": 104.4676, "step": 76780 }, { "epoch": 0.6352318319063572, "grad_norm": 482.55059814453125, "learning_rate": 3.7814974846918496e-06, "loss": 87.8084, "step": 76790 }, { "epoch": 0.6353145551557265, "grad_norm": 731.9932250976562, "learning_rate": 3.7801296879223594e-06, "loss": 64.0985, "step": 76800 }, { "epoch": 0.6353972784050957, "grad_norm": 421.17864990234375, "learning_rate": 3.7787619882123506e-06, "loss": 95.0226, "step": 76810 }, { "epoch": 0.6354800016544649, "grad_norm": 945.0263671875, "learning_rate": 3.7773943856706463e-06, "loss": 96.3978, "step": 76820 }, { "epoch": 0.6355627249038343, "grad_norm": 705.6464233398438, "learning_rate": 3.7760268804060583e-06, "loss": 92.4526, "step": 76830 }, { "epoch": 0.6356454481532035, "grad_norm": 624.5090942382812, "learning_rate": 3.774659472527396e-06, "loss": 82.1249, "step": 76840 }, { "epoch": 0.6357281714025727, "grad_norm": 788.3277587890625, "learning_rate": 3.7732921621434553e-06, "loss": 66.9649, "step": 76850 }, { "epoch": 0.635810894651942, "grad_norm": 451.6109619140625, "learning_rate": 3.77192494936303e-06, "loss": 113.1434, "step": 76860 }, { "epoch": 0.6358936179013112, "grad_norm": 711.1947021484375, "learning_rate": 3.7705578342948967e-06, "loss": 81.1459, "step": 76870 }, { "epoch": 0.6359763411506804, "grad_norm": 987.3251953125, "learning_rate": 3.7691908170478352e-06, "loss": 82.2781, "step": 76880 }, { "epoch": 0.6360590644000497, "grad_norm": 791.8734130859375, "learning_rate": 3.767823897730612e-06, "loss": 87.6097, "step": 76890 }, { "epoch": 0.6361417876494189, "grad_norm": 657.6787109375, "learning_rate": 3.7664570764519865e-06, "loss": 88.5876, "step": 76900 }, { "epoch": 0.6362245108987881, "grad_norm": 958.947265625, "learning_rate": 3.76509035332071e-06, "loss": 94.1349, "step": 76910 }, { "epoch": 0.6363072341481574, "grad_norm": 1417.035400390625, "learning_rate": 3.7637237284455264e-06, "loss": 97.1612, "step": 76920 }, { "epoch": 0.6363899573975266, "grad_norm": 766.021240234375, "learning_rate": 3.762357201935171e-06, "loss": 82.1091, "step": 76930 }, { "epoch": 0.6364726806468958, "grad_norm": 1105.8828125, "learning_rate": 3.7609907738983762e-06, "loss": 88.7781, "step": 76940 }, { "epoch": 0.6365554038962651, "grad_norm": 916.3728637695312, "learning_rate": 3.7596244444438577e-06, "loss": 99.8711, "step": 76950 }, { "epoch": 0.6366381271456343, "grad_norm": 1511.9302978515625, "learning_rate": 3.758258213680328e-06, "loss": 76.2348, "step": 76960 }, { "epoch": 0.6367208503950035, "grad_norm": 966.1973266601562, "learning_rate": 3.7568920817164945e-06, "loss": 102.2466, "step": 76970 }, { "epoch": 0.6368035736443728, "grad_norm": 1336.3597412109375, "learning_rate": 3.755526048661053e-06, "loss": 102.7599, "step": 76980 }, { "epoch": 0.636886296893742, "grad_norm": 845.8065185546875, "learning_rate": 3.7541601146226924e-06, "loss": 92.4951, "step": 76990 }, { "epoch": 0.6369690201431112, "grad_norm": 876.119873046875, "learning_rate": 3.752794279710094e-06, "loss": 90.3612, "step": 77000 }, { "epoch": 0.6370517433924805, "grad_norm": 1019.0983276367188, "learning_rate": 3.751428544031931e-06, "loss": 69.2927, "step": 77010 }, { "epoch": 0.6371344666418497, "grad_norm": 591.1396484375, "learning_rate": 3.750062907696868e-06, "loss": 59.3948, "step": 77020 }, { "epoch": 0.6372171898912189, "grad_norm": 627.9861450195312, "learning_rate": 3.7486973708135643e-06, "loss": 89.2826, "step": 77030 }, { "epoch": 0.6372999131405882, "grad_norm": 823.3641967773438, "learning_rate": 3.7473319334906678e-06, "loss": 90.4643, "step": 77040 }, { "epoch": 0.6373826363899574, "grad_norm": 1226.17333984375, "learning_rate": 3.7459665958368197e-06, "loss": 88.7953, "step": 77050 }, { "epoch": 0.6374653596393266, "grad_norm": 1117.8607177734375, "learning_rate": 3.7446013579606534e-06, "loss": 101.2203, "step": 77060 }, { "epoch": 0.6375480828886959, "grad_norm": 1034.3206787109375, "learning_rate": 3.743236219970796e-06, "loss": 102.8942, "step": 77070 }, { "epoch": 0.6376308061380651, "grad_norm": 929.0318603515625, "learning_rate": 3.741871181975866e-06, "loss": 102.5503, "step": 77080 }, { "epoch": 0.6377135293874343, "grad_norm": 815.303466796875, "learning_rate": 3.740506244084471e-06, "loss": 88.2377, "step": 77090 }, { "epoch": 0.6377962526368036, "grad_norm": 1119.909423828125, "learning_rate": 3.7391414064052138e-06, "loss": 85.4328, "step": 77100 }, { "epoch": 0.6378789758861728, "grad_norm": 477.2499084472656, "learning_rate": 3.737776669046689e-06, "loss": 85.3801, "step": 77110 }, { "epoch": 0.637961699135542, "grad_norm": 1207.775634765625, "learning_rate": 3.7364120321174826e-06, "loss": 105.7464, "step": 77120 }, { "epoch": 0.6380444223849113, "grad_norm": 664.5465698242188, "learning_rate": 3.7350474957261705e-06, "loss": 119.5242, "step": 77130 }, { "epoch": 0.6381271456342805, "grad_norm": 686.4176025390625, "learning_rate": 3.7336830599813245e-06, "loss": 96.0158, "step": 77140 }, { "epoch": 0.6382098688836497, "grad_norm": 537.5717163085938, "learning_rate": 3.732318724991505e-06, "loss": 75.7251, "step": 77150 }, { "epoch": 0.638292592133019, "grad_norm": 1141.5447998046875, "learning_rate": 3.730954490865266e-06, "loss": 88.8078, "step": 77160 }, { "epoch": 0.6383753153823882, "grad_norm": 990.2410888671875, "learning_rate": 3.7295903577111548e-06, "loss": 86.1368, "step": 77170 }, { "epoch": 0.6384580386317574, "grad_norm": 613.973876953125, "learning_rate": 3.728226325637709e-06, "loss": 57.9084, "step": 77180 }, { "epoch": 0.6385407618811267, "grad_norm": 959.3004150390625, "learning_rate": 3.726862394753457e-06, "loss": 88.2764, "step": 77190 }, { "epoch": 0.6386234851304959, "grad_norm": 1009.2028198242188, "learning_rate": 3.725498565166923e-06, "loss": 60.0482, "step": 77200 }, { "epoch": 0.6387062083798651, "grad_norm": 731.6983032226562, "learning_rate": 3.7241348369866183e-06, "loss": 90.616, "step": 77210 }, { "epoch": 0.6387889316292344, "grad_norm": 1137.9534912109375, "learning_rate": 3.7227712103210485e-06, "loss": 77.4084, "step": 77220 }, { "epoch": 0.6388716548786036, "grad_norm": 740.2508544921875, "learning_rate": 3.721407685278712e-06, "loss": 99.5695, "step": 77230 }, { "epoch": 0.6389543781279728, "grad_norm": 921.7864379882812, "learning_rate": 3.7200442619680976e-06, "loss": 62.2452, "step": 77240 }, { "epoch": 0.6390371013773422, "grad_norm": 1076.6083984375, "learning_rate": 3.7186809404976877e-06, "loss": 95.747, "step": 77250 }, { "epoch": 0.6391198246267114, "grad_norm": 971.0541381835938, "learning_rate": 3.7173177209759538e-06, "loss": 99.368, "step": 77260 }, { "epoch": 0.6392025478760806, "grad_norm": 1410.000732421875, "learning_rate": 3.715954603511363e-06, "loss": 106.5322, "step": 77270 }, { "epoch": 0.6392852711254499, "grad_norm": 587.7864990234375, "learning_rate": 3.714591588212372e-06, "loss": 74.5204, "step": 77280 }, { "epoch": 0.6393679943748191, "grad_norm": 632.3125, "learning_rate": 3.713228675187429e-06, "loss": 105.5088, "step": 77290 }, { "epoch": 0.6394507176241883, "grad_norm": 768.724609375, "learning_rate": 3.7118658645449745e-06, "loss": 87.9743, "step": 77300 }, { "epoch": 0.6395334408735576, "grad_norm": 721.2608642578125, "learning_rate": 3.710503156393441e-06, "loss": 79.1286, "step": 77310 }, { "epoch": 0.6396161641229268, "grad_norm": 1327.6044921875, "learning_rate": 3.7091405508412538e-06, "loss": 95.4713, "step": 77320 }, { "epoch": 0.639698887372296, "grad_norm": 1236.894287109375, "learning_rate": 3.7077780479968286e-06, "loss": 91.4254, "step": 77330 }, { "epoch": 0.6397816106216652, "grad_norm": 883.6204833984375, "learning_rate": 3.7064156479685736e-06, "loss": 119.5111, "step": 77340 }, { "epoch": 0.6398643338710345, "grad_norm": 589.4072265625, "learning_rate": 3.705053350864888e-06, "loss": 71.212, "step": 77350 }, { "epoch": 0.6399470571204037, "grad_norm": 1030.01904296875, "learning_rate": 3.703691156794165e-06, "loss": 68.7815, "step": 77360 }, { "epoch": 0.6400297803697729, "grad_norm": 1243.1724853515625, "learning_rate": 3.7023290658647893e-06, "loss": 81.1806, "step": 77370 }, { "epoch": 0.6401125036191422, "grad_norm": 1076.9923095703125, "learning_rate": 3.7009670781851326e-06, "loss": 92.6175, "step": 77380 }, { "epoch": 0.6401952268685114, "grad_norm": 811.0065307617188, "learning_rate": 3.6996051938635626e-06, "loss": 94.767, "step": 77390 }, { "epoch": 0.6402779501178806, "grad_norm": 707.3673095703125, "learning_rate": 3.69824341300844e-06, "loss": 82.464, "step": 77400 }, { "epoch": 0.6403606733672499, "grad_norm": 801.9874267578125, "learning_rate": 3.696881735728115e-06, "loss": 89.9301, "step": 77410 }, { "epoch": 0.6404433966166191, "grad_norm": 782.5747680664062, "learning_rate": 3.6955201621309302e-06, "loss": 91.4034, "step": 77420 }, { "epoch": 0.6405261198659883, "grad_norm": 1794.2557373046875, "learning_rate": 3.6941586923252194e-06, "loss": 89.6232, "step": 77430 }, { "epoch": 0.6406088431153576, "grad_norm": 1282.8133544921875, "learning_rate": 3.6927973264193074e-06, "loss": 84.1603, "step": 77440 }, { "epoch": 0.6406915663647268, "grad_norm": 615.4580078125, "learning_rate": 3.691436064521513e-06, "loss": 106.5725, "step": 77450 }, { "epoch": 0.640774289614096, "grad_norm": 885.3602905273438, "learning_rate": 3.6900749067401474e-06, "loss": 100.404, "step": 77460 }, { "epoch": 0.6408570128634653, "grad_norm": 883.349853515625, "learning_rate": 3.6887138531835085e-06, "loss": 120.5579, "step": 77470 }, { "epoch": 0.6409397361128345, "grad_norm": 510.637939453125, "learning_rate": 3.6873529039598903e-06, "loss": 72.0067, "step": 77480 }, { "epoch": 0.6410224593622037, "grad_norm": 1126.9063720703125, "learning_rate": 3.6859920591775763e-06, "loss": 89.0631, "step": 77490 }, { "epoch": 0.641105182611573, "grad_norm": 897.1529541015625, "learning_rate": 3.6846313189448447e-06, "loss": 95.6343, "step": 77500 }, { "epoch": 0.6411879058609422, "grad_norm": 1228.466796875, "learning_rate": 3.6832706833699616e-06, "loss": 107.3611, "step": 77510 }, { "epoch": 0.6412706291103114, "grad_norm": 386.9453125, "learning_rate": 3.681910152561188e-06, "loss": 56.1387, "step": 77520 }, { "epoch": 0.6413533523596807, "grad_norm": 651.796142578125, "learning_rate": 3.6805497266267742e-06, "loss": 80.0673, "step": 77530 }, { "epoch": 0.6414360756090499, "grad_norm": 970.6996459960938, "learning_rate": 3.679189405674963e-06, "loss": 86.8163, "step": 77540 }, { "epoch": 0.6415187988584191, "grad_norm": 921.419189453125, "learning_rate": 3.6778291898139907e-06, "loss": 99.753, "step": 77550 }, { "epoch": 0.6416015221077884, "grad_norm": 940.6666259765625, "learning_rate": 3.6764690791520797e-06, "loss": 72.5782, "step": 77560 }, { "epoch": 0.6416842453571576, "grad_norm": 927.505126953125, "learning_rate": 3.6751090737974506e-06, "loss": 80.6144, "step": 77570 }, { "epoch": 0.6417669686065268, "grad_norm": 1062.0577392578125, "learning_rate": 3.673749173858312e-06, "loss": 80.1158, "step": 77580 }, { "epoch": 0.6418496918558961, "grad_norm": 1165.107177734375, "learning_rate": 3.672389379442864e-06, "loss": 109.7003, "step": 77590 }, { "epoch": 0.6419324151052653, "grad_norm": 929.5150756835938, "learning_rate": 3.6710296906593012e-06, "loss": 76.4433, "step": 77600 }, { "epoch": 0.6420151383546345, "grad_norm": 709.593017578125, "learning_rate": 3.6696701076158064e-06, "loss": 78.3428, "step": 77610 }, { "epoch": 0.6420978616040038, "grad_norm": 995.3925170898438, "learning_rate": 3.6683106304205564e-06, "loss": 92.2917, "step": 77620 }, { "epoch": 0.642180584853373, "grad_norm": 426.57952880859375, "learning_rate": 3.666951259181718e-06, "loss": 80.3045, "step": 77630 }, { "epoch": 0.6422633081027422, "grad_norm": 1803.5283203125, "learning_rate": 3.6655919940074497e-06, "loss": 85.4238, "step": 77640 }, { "epoch": 0.6423460313521115, "grad_norm": 863.35302734375, "learning_rate": 3.664232835005902e-06, "loss": 112.9134, "step": 77650 }, { "epoch": 0.6424287546014807, "grad_norm": 444.5658264160156, "learning_rate": 3.6628737822852177e-06, "loss": 73.9999, "step": 77660 }, { "epoch": 0.6425114778508499, "grad_norm": 895.299560546875, "learning_rate": 3.66151483595353e-06, "loss": 98.808, "step": 77670 }, { "epoch": 0.6425942011002193, "grad_norm": 622.2603149414062, "learning_rate": 3.6601559961189626e-06, "loss": 72.3235, "step": 77680 }, { "epoch": 0.6426769243495885, "grad_norm": 751.9857788085938, "learning_rate": 3.6587972628896345e-06, "loss": 65.3022, "step": 77690 }, { "epoch": 0.6427596475989577, "grad_norm": 696.5802001953125, "learning_rate": 3.6574386363736532e-06, "loss": 111.5915, "step": 77700 }, { "epoch": 0.642842370848327, "grad_norm": 1348.688232421875, "learning_rate": 3.6560801166791183e-06, "loss": 111.6634, "step": 77710 }, { "epoch": 0.6429250940976962, "grad_norm": 933.01953125, "learning_rate": 3.654721703914121e-06, "loss": 101.2537, "step": 77720 }, { "epoch": 0.6430078173470654, "grad_norm": 555.1231689453125, "learning_rate": 3.6533633981867433e-06, "loss": 71.2294, "step": 77730 }, { "epoch": 0.6430905405964347, "grad_norm": 681.1109008789062, "learning_rate": 3.652005199605059e-06, "loss": 114.1313, "step": 77740 }, { "epoch": 0.6431732638458039, "grad_norm": 1005.3911743164062, "learning_rate": 3.6506471082771357e-06, "loss": 98.2529, "step": 77750 }, { "epoch": 0.6432559870951731, "grad_norm": 1053.3704833984375, "learning_rate": 3.6492891243110283e-06, "loss": 72.2338, "step": 77760 }, { "epoch": 0.6433387103445424, "grad_norm": 664.69482421875, "learning_rate": 3.6479312478147866e-06, "loss": 93.1901, "step": 77770 }, { "epoch": 0.6434214335939116, "grad_norm": 517.3734741210938, "learning_rate": 3.64657347889645e-06, "loss": 81.5787, "step": 77780 }, { "epoch": 0.6435041568432808, "grad_norm": 778.450927734375, "learning_rate": 3.6452158176640505e-06, "loss": 111.1286, "step": 77790 }, { "epoch": 0.6435868800926501, "grad_norm": 795.8098754882812, "learning_rate": 3.6438582642256138e-06, "loss": 80.7202, "step": 77800 }, { "epoch": 0.6436696033420193, "grad_norm": 1202.6724853515625, "learning_rate": 3.642500818689148e-06, "loss": 89.1335, "step": 77810 }, { "epoch": 0.6437523265913885, "grad_norm": 1074.335205078125, "learning_rate": 3.641143481162661e-06, "loss": 71.3191, "step": 77820 }, { "epoch": 0.6438350498407578, "grad_norm": 1161.094482421875, "learning_rate": 3.639786251754153e-06, "loss": 85.943, "step": 77830 }, { "epoch": 0.643917773090127, "grad_norm": 3801.1142578125, "learning_rate": 3.638429130571609e-06, "loss": 89.4513, "step": 77840 }, { "epoch": 0.6440004963394962, "grad_norm": 818.4242553710938, "learning_rate": 3.637072117723012e-06, "loss": 108.8363, "step": 77850 }, { "epoch": 0.6440832195888655, "grad_norm": 541.69921875, "learning_rate": 3.6357152133163297e-06, "loss": 83.608, "step": 77860 }, { "epoch": 0.6441659428382347, "grad_norm": 540.3868408203125, "learning_rate": 3.634358417459528e-06, "loss": 109.2081, "step": 77870 }, { "epoch": 0.6442486660876039, "grad_norm": 2112.091796875, "learning_rate": 3.633001730260558e-06, "loss": 145.9334, "step": 77880 }, { "epoch": 0.6443313893369732, "grad_norm": 1037.028076171875, "learning_rate": 3.63164515182737e-06, "loss": 128.6462, "step": 77890 }, { "epoch": 0.6444141125863424, "grad_norm": 710.5433959960938, "learning_rate": 3.630288682267895e-06, "loss": 95.2309, "step": 77900 }, { "epoch": 0.6444968358357116, "grad_norm": 542.4974365234375, "learning_rate": 3.628932321690063e-06, "loss": 89.335, "step": 77910 }, { "epoch": 0.6445795590850809, "grad_norm": 910.8280639648438, "learning_rate": 3.6275760702017938e-06, "loss": 123.32, "step": 77920 }, { "epoch": 0.6446622823344501, "grad_norm": 474.7346496582031, "learning_rate": 3.626219927910999e-06, "loss": 144.9698, "step": 77930 }, { "epoch": 0.6447450055838193, "grad_norm": 729.6742553710938, "learning_rate": 3.6248638949255795e-06, "loss": 97.5584, "step": 77940 }, { "epoch": 0.6448277288331886, "grad_norm": 692.0260009765625, "learning_rate": 3.6235079713534287e-06, "loss": 96.659, "step": 77950 }, { "epoch": 0.6449104520825578, "grad_norm": 1216.1015625, "learning_rate": 3.6221521573024316e-06, "loss": 119.2402, "step": 77960 }, { "epoch": 0.644993175331927, "grad_norm": 1217.374267578125, "learning_rate": 3.620796452880464e-06, "loss": 110.5306, "step": 77970 }, { "epoch": 0.6450758985812963, "grad_norm": 606.9527587890625, "learning_rate": 3.6194408581953934e-06, "loss": 99.9628, "step": 77980 }, { "epoch": 0.6451586218306655, "grad_norm": 649.1746826171875, "learning_rate": 3.618085373355077e-06, "loss": 95.9513, "step": 77990 }, { "epoch": 0.6452413450800347, "grad_norm": 3910.442626953125, "learning_rate": 3.6167299984673655e-06, "loss": 100.3708, "step": 78000 }, { "epoch": 0.645324068329404, "grad_norm": 991.4779663085938, "learning_rate": 3.615374733640099e-06, "loss": 89.7993, "step": 78010 }, { "epoch": 0.6454067915787732, "grad_norm": 814.968017578125, "learning_rate": 3.6140195789811108e-06, "loss": 95.6553, "step": 78020 }, { "epoch": 0.6454895148281424, "grad_norm": 974.3428344726562, "learning_rate": 3.6126645345982243e-06, "loss": 88.6417, "step": 78030 }, { "epoch": 0.6455722380775117, "grad_norm": 700.50390625, "learning_rate": 3.611309600599253e-06, "loss": 97.1519, "step": 78040 }, { "epoch": 0.6456549613268809, "grad_norm": 1007.422607421875, "learning_rate": 3.6099547770920046e-06, "loss": 96.4095, "step": 78050 }, { "epoch": 0.6457376845762501, "grad_norm": 833.5045776367188, "learning_rate": 3.6086000641842757e-06, "loss": 90.0102, "step": 78060 }, { "epoch": 0.6458204078256193, "grad_norm": 593.3013916015625, "learning_rate": 3.6072454619838525e-06, "loss": 89.5628, "step": 78070 }, { "epoch": 0.6459031310749886, "grad_norm": 937.5786743164062, "learning_rate": 3.6058909705985166e-06, "loss": 93.1092, "step": 78080 }, { "epoch": 0.6459858543243578, "grad_norm": 1777.1658935546875, "learning_rate": 3.6045365901360385e-06, "loss": 75.8696, "step": 78090 }, { "epoch": 0.646068577573727, "grad_norm": 819.875732421875, "learning_rate": 3.603182320704179e-06, "loss": 77.7278, "step": 78100 }, { "epoch": 0.6461513008230964, "grad_norm": 661.6198120117188, "learning_rate": 3.601828162410691e-06, "loss": 108.2391, "step": 78110 }, { "epoch": 0.6462340240724656, "grad_norm": 1499.3050537109375, "learning_rate": 3.6004741153633194e-06, "loss": 92.8364, "step": 78120 }, { "epoch": 0.6463167473218348, "grad_norm": 854.0673828125, "learning_rate": 3.5991201796698006e-06, "loss": 108.2219, "step": 78130 }, { "epoch": 0.6463994705712041, "grad_norm": 1086.9384765625, "learning_rate": 3.5977663554378594e-06, "loss": 84.7707, "step": 78140 }, { "epoch": 0.6464821938205733, "grad_norm": 1125.5399169921875, "learning_rate": 3.5964126427752155e-06, "loss": 109.9038, "step": 78150 }, { "epoch": 0.6465649170699425, "grad_norm": 1124.92919921875, "learning_rate": 3.595059041789575e-06, "loss": 101.9598, "step": 78160 }, { "epoch": 0.6466476403193118, "grad_norm": 868.62255859375, "learning_rate": 3.5937055525886377e-06, "loss": 80.6028, "step": 78170 }, { "epoch": 0.646730363568681, "grad_norm": 605.7478637695312, "learning_rate": 3.592352175280096e-06, "loss": 91.8966, "step": 78180 }, { "epoch": 0.6468130868180502, "grad_norm": 630.140380859375, "learning_rate": 3.5909989099716325e-06, "loss": 91.8721, "step": 78190 }, { "epoch": 0.6468958100674195, "grad_norm": 599.5414428710938, "learning_rate": 3.589645756770918e-06, "loss": 73.2456, "step": 78200 }, { "epoch": 0.6469785333167887, "grad_norm": 549.9411010742188, "learning_rate": 3.5882927157856175e-06, "loss": 76.2016, "step": 78210 }, { "epoch": 0.6470612565661579, "grad_norm": 965.70947265625, "learning_rate": 3.586939787123388e-06, "loss": 89.8131, "step": 78220 }, { "epoch": 0.6471439798155272, "grad_norm": 705.2805786132812, "learning_rate": 3.585586970891876e-06, "loss": 85.9309, "step": 78230 }, { "epoch": 0.6472267030648964, "grad_norm": 539.7587890625, "learning_rate": 3.584234267198715e-06, "loss": 83.7423, "step": 78240 }, { "epoch": 0.6473094263142656, "grad_norm": 904.0587768554688, "learning_rate": 3.582881676151536e-06, "loss": 102.2972, "step": 78250 }, { "epoch": 0.6473921495636349, "grad_norm": 1084.20263671875, "learning_rate": 3.581529197857959e-06, "loss": 72.5396, "step": 78260 }, { "epoch": 0.6474748728130041, "grad_norm": 1199.1031494140625, "learning_rate": 3.580176832425594e-06, "loss": 96.1683, "step": 78270 }, { "epoch": 0.6475575960623733, "grad_norm": 1210.6788330078125, "learning_rate": 3.5788245799620425e-06, "loss": 103.4515, "step": 78280 }, { "epoch": 0.6476403193117426, "grad_norm": 670.8250122070312, "learning_rate": 3.577472440574896e-06, "loss": 80.9642, "step": 78290 }, { "epoch": 0.6477230425611118, "grad_norm": 965.8837890625, "learning_rate": 3.5761204143717387e-06, "loss": 61.6829, "step": 78300 }, { "epoch": 0.647805765810481, "grad_norm": 566.632568359375, "learning_rate": 3.5747685014601456e-06, "loss": 104.7201, "step": 78310 }, { "epoch": 0.6478884890598503, "grad_norm": 464.7363586425781, "learning_rate": 3.5734167019476845e-06, "loss": 90.921, "step": 78320 }, { "epoch": 0.6479712123092195, "grad_norm": 498.57763671875, "learning_rate": 3.572065015941907e-06, "loss": 85.318, "step": 78330 }, { "epoch": 0.6480539355585887, "grad_norm": 826.5978393554688, "learning_rate": 3.570713443550362e-06, "loss": 94.5103, "step": 78340 }, { "epoch": 0.648136658807958, "grad_norm": 807.204345703125, "learning_rate": 3.5693619848805892e-06, "loss": 90.0785, "step": 78350 }, { "epoch": 0.6482193820573272, "grad_norm": 774.7725219726562, "learning_rate": 3.568010640040118e-06, "loss": 70.4705, "step": 78360 }, { "epoch": 0.6483021053066964, "grad_norm": 504.8932189941406, "learning_rate": 3.566659409136468e-06, "loss": 90.6975, "step": 78370 }, { "epoch": 0.6483848285560657, "grad_norm": 746.927978515625, "learning_rate": 3.565308292277151e-06, "loss": 116.9276, "step": 78380 }, { "epoch": 0.6484675518054349, "grad_norm": 912.6074829101562, "learning_rate": 3.563957289569669e-06, "loss": 78.5412, "step": 78390 }, { "epoch": 0.6485502750548041, "grad_norm": 737.116455078125, "learning_rate": 3.5626064011215135e-06, "loss": 76.1263, "step": 78400 }, { "epoch": 0.6486329983041734, "grad_norm": 932.8018188476562, "learning_rate": 3.5612556270401733e-06, "loss": 84.2955, "step": 78410 }, { "epoch": 0.6487157215535426, "grad_norm": 862.2860717773438, "learning_rate": 3.5599049674331175e-06, "loss": 102.538, "step": 78420 }, { "epoch": 0.6487984448029118, "grad_norm": 1257.6517333984375, "learning_rate": 3.5585544224078143e-06, "loss": 101.6207, "step": 78430 }, { "epoch": 0.6488811680522811, "grad_norm": 948.5725708007812, "learning_rate": 3.5572039920717192e-06, "loss": 79.6746, "step": 78440 }, { "epoch": 0.6489638913016503, "grad_norm": 1046.1217041015625, "learning_rate": 3.5558536765322825e-06, "loss": 113.3602, "step": 78450 }, { "epoch": 0.6490466145510195, "grad_norm": 915.197509765625, "learning_rate": 3.554503475896941e-06, "loss": 97.8924, "step": 78460 }, { "epoch": 0.6491293378003888, "grad_norm": 1135.54248046875, "learning_rate": 3.553153390273124e-06, "loss": 69.3663, "step": 78470 }, { "epoch": 0.649212061049758, "grad_norm": 998.7444458007812, "learning_rate": 3.551803419768251e-06, "loss": 73.8026, "step": 78480 }, { "epoch": 0.6492947842991272, "grad_norm": 949.7462158203125, "learning_rate": 3.5504535644897352e-06, "loss": 105.2546, "step": 78490 }, { "epoch": 0.6493775075484965, "grad_norm": 992.8348388671875, "learning_rate": 3.549103824544975e-06, "loss": 106.6997, "step": 78500 }, { "epoch": 0.6494602307978657, "grad_norm": 748.7544555664062, "learning_rate": 3.5477542000413657e-06, "loss": 85.0411, "step": 78510 }, { "epoch": 0.6495429540472349, "grad_norm": 1108.8211669921875, "learning_rate": 3.546404691086289e-06, "loss": 102.6315, "step": 78520 }, { "epoch": 0.6496256772966043, "grad_norm": 552.9169311523438, "learning_rate": 3.5450552977871207e-06, "loss": 83.2827, "step": 78530 }, { "epoch": 0.6497084005459735, "grad_norm": 615.8543090820312, "learning_rate": 3.543706020251223e-06, "loss": 93.9779, "step": 78540 }, { "epoch": 0.6497911237953427, "grad_norm": 740.286376953125, "learning_rate": 3.542356858585956e-06, "loss": 85.2819, "step": 78550 }, { "epoch": 0.649873847044712, "grad_norm": 898.7952270507812, "learning_rate": 3.541007812898663e-06, "loss": 105.5582, "step": 78560 }, { "epoch": 0.6499565702940812, "grad_norm": 719.4003295898438, "learning_rate": 3.539658883296683e-06, "loss": 73.1953, "step": 78570 }, { "epoch": 0.6500392935434504, "grad_norm": 1007.62109375, "learning_rate": 3.5383100698873446e-06, "loss": 103.7223, "step": 78580 }, { "epoch": 0.6501220167928197, "grad_norm": 1619.2178955078125, "learning_rate": 3.536961372777965e-06, "loss": 125.7312, "step": 78590 }, { "epoch": 0.6502047400421889, "grad_norm": 796.3026123046875, "learning_rate": 3.535612792075854e-06, "loss": 86.965, "step": 78600 }, { "epoch": 0.6502874632915581, "grad_norm": 686.6871337890625, "learning_rate": 3.5342643278883127e-06, "loss": 67.3191, "step": 78610 }, { "epoch": 0.6503701865409274, "grad_norm": 950.4487915039062, "learning_rate": 3.532915980322632e-06, "loss": 76.5455, "step": 78620 }, { "epoch": 0.6504529097902966, "grad_norm": 750.7484741210938, "learning_rate": 3.5315677494860923e-06, "loss": 94.5387, "step": 78630 }, { "epoch": 0.6505356330396658, "grad_norm": 599.2245483398438, "learning_rate": 3.5302196354859693e-06, "loss": 97.6871, "step": 78640 }, { "epoch": 0.6506183562890351, "grad_norm": 915.01123046875, "learning_rate": 3.528871638429524e-06, "loss": 84.8775, "step": 78650 }, { "epoch": 0.6507010795384043, "grad_norm": 1431.2159423828125, "learning_rate": 3.527523758424013e-06, "loss": 101.8159, "step": 78660 }, { "epoch": 0.6507838027877735, "grad_norm": 907.7205810546875, "learning_rate": 3.526175995576676e-06, "loss": 92.8583, "step": 78670 }, { "epoch": 0.6508665260371428, "grad_norm": 685.8528442382812, "learning_rate": 3.524828349994752e-06, "loss": 108.4734, "step": 78680 }, { "epoch": 0.650949249286512, "grad_norm": 1018.92919921875, "learning_rate": 3.523480821785466e-06, "loss": 116.4635, "step": 78690 }, { "epoch": 0.6510319725358812, "grad_norm": 616.6637573242188, "learning_rate": 3.5221334110560345e-06, "loss": 70.0087, "step": 78700 }, { "epoch": 0.6511146957852505, "grad_norm": 1268.9815673828125, "learning_rate": 3.5207861179136654e-06, "loss": 87.3583, "step": 78710 }, { "epoch": 0.6511974190346197, "grad_norm": 787.7352905273438, "learning_rate": 3.519438942465556e-06, "loss": 79.1112, "step": 78720 }, { "epoch": 0.6512801422839889, "grad_norm": 756.2986450195312, "learning_rate": 3.5180918848188937e-06, "loss": 89.4581, "step": 78730 }, { "epoch": 0.6513628655333582, "grad_norm": 439.9861145019531, "learning_rate": 3.516744945080861e-06, "loss": 82.7982, "step": 78740 }, { "epoch": 0.6514455887827274, "grad_norm": 702.6160278320312, "learning_rate": 3.5153981233586277e-06, "loss": 87.4703, "step": 78750 }, { "epoch": 0.6515283120320966, "grad_norm": 973.2195434570312, "learning_rate": 3.5140514197593494e-06, "loss": 105.3042, "step": 78760 }, { "epoch": 0.6516110352814659, "grad_norm": 547.9530639648438, "learning_rate": 3.512704834390179e-06, "loss": 90.783, "step": 78770 }, { "epoch": 0.6516937585308351, "grad_norm": 734.762939453125, "learning_rate": 3.5113583673582613e-06, "loss": 93.9857, "step": 78780 }, { "epoch": 0.6517764817802043, "grad_norm": 730.4273681640625, "learning_rate": 3.510012018770726e-06, "loss": 96.3544, "step": 78790 }, { "epoch": 0.6518592050295735, "grad_norm": 1371.4984130859375, "learning_rate": 3.508665788734696e-06, "loss": 97.4626, "step": 78800 }, { "epoch": 0.6519419282789428, "grad_norm": 974.1882934570312, "learning_rate": 3.507319677357285e-06, "loss": 106.0504, "step": 78810 }, { "epoch": 0.652024651528312, "grad_norm": 973.0641479492188, "learning_rate": 3.5059736847455967e-06, "loss": 85.558, "step": 78820 }, { "epoch": 0.6521073747776812, "grad_norm": 1314.4976806640625, "learning_rate": 3.5046278110067242e-06, "loss": 122.3348, "step": 78830 }, { "epoch": 0.6521900980270505, "grad_norm": 687.9398803710938, "learning_rate": 3.5032820562477577e-06, "loss": 150.5666, "step": 78840 }, { "epoch": 0.6522728212764197, "grad_norm": 1185.294189453125, "learning_rate": 3.5019364205757667e-06, "loss": 89.5953, "step": 78850 }, { "epoch": 0.6523555445257889, "grad_norm": 736.0501708984375, "learning_rate": 3.5005909040978188e-06, "loss": 88.9674, "step": 78860 }, { "epoch": 0.6524382677751582, "grad_norm": 644.609619140625, "learning_rate": 3.4992455069209717e-06, "loss": 84.699, "step": 78870 }, { "epoch": 0.6525209910245274, "grad_norm": 954.2164306640625, "learning_rate": 3.4979002291522723e-06, "loss": 128.6724, "step": 78880 }, { "epoch": 0.6526037142738966, "grad_norm": 750.8978881835938, "learning_rate": 3.4965550708987583e-06, "loss": 71.4564, "step": 78890 }, { "epoch": 0.6526864375232659, "grad_norm": 927.7744140625, "learning_rate": 3.4952100322674574e-06, "loss": 93.2375, "step": 78900 }, { "epoch": 0.6527691607726351, "grad_norm": 882.0057983398438, "learning_rate": 3.4938651133653877e-06, "loss": 89.3872, "step": 78910 }, { "epoch": 0.6528518840220043, "grad_norm": 909.5263671875, "learning_rate": 3.49252031429956e-06, "loss": 90.456, "step": 78920 }, { "epoch": 0.6529346072713736, "grad_norm": 810.2213134765625, "learning_rate": 3.4911756351769722e-06, "loss": 93.9017, "step": 78930 }, { "epoch": 0.6530173305207428, "grad_norm": 510.4373474121094, "learning_rate": 3.4898310761046133e-06, "loss": 75.6976, "step": 78940 }, { "epoch": 0.653100053770112, "grad_norm": 876.5424194335938, "learning_rate": 3.4884866371894654e-06, "loss": 82.5301, "step": 78950 }, { "epoch": 0.6531827770194814, "grad_norm": 496.5128173828125, "learning_rate": 3.487142318538498e-06, "loss": 124.7649, "step": 78960 }, { "epoch": 0.6532655002688506, "grad_norm": 574.9053344726562, "learning_rate": 3.4857981202586742e-06, "loss": 117.4653, "step": 78970 }, { "epoch": 0.6533482235182198, "grad_norm": 619.2501831054688, "learning_rate": 3.4844540424569453e-06, "loss": 75.0704, "step": 78980 }, { "epoch": 0.6534309467675891, "grad_norm": 502.5335693359375, "learning_rate": 3.483110085240252e-06, "loss": 117.5442, "step": 78990 }, { "epoch": 0.6535136700169583, "grad_norm": 802.0072631835938, "learning_rate": 3.481766248715528e-06, "loss": 100.3333, "step": 79000 }, { "epoch": 0.6535963932663275, "grad_norm": 762.8905639648438, "learning_rate": 3.4804225329896963e-06, "loss": 100.5814, "step": 79010 }, { "epoch": 0.6536791165156968, "grad_norm": 732.5484619140625, "learning_rate": 3.4790789381696686e-06, "loss": 110.0594, "step": 79020 }, { "epoch": 0.653761839765066, "grad_norm": 865.77734375, "learning_rate": 3.4777354643623506e-06, "loss": 80.9146, "step": 79030 }, { "epoch": 0.6538445630144352, "grad_norm": 957.7134399414062, "learning_rate": 3.4763921116746352e-06, "loss": 93.2645, "step": 79040 }, { "epoch": 0.6539272862638045, "grad_norm": 900.92041015625, "learning_rate": 3.475048880213407e-06, "loss": 62.1217, "step": 79050 }, { "epoch": 0.6540100095131737, "grad_norm": 544.1870727539062, "learning_rate": 3.473705770085539e-06, "loss": 93.6251, "step": 79060 }, { "epoch": 0.6540927327625429, "grad_norm": 1076.5374755859375, "learning_rate": 3.4723627813979005e-06, "loss": 113.8579, "step": 79070 }, { "epoch": 0.6541754560119122, "grad_norm": 862.5486450195312, "learning_rate": 3.471019914257344e-06, "loss": 94.458, "step": 79080 }, { "epoch": 0.6542581792612814, "grad_norm": 637.738037109375, "learning_rate": 3.4696771687707176e-06, "loss": 86.9744, "step": 79090 }, { "epoch": 0.6543409025106506, "grad_norm": 979.8980712890625, "learning_rate": 3.468334545044853e-06, "loss": 89.8826, "step": 79100 }, { "epoch": 0.6544236257600199, "grad_norm": 1138.6566162109375, "learning_rate": 3.46699204318658e-06, "loss": 93.8205, "step": 79110 }, { "epoch": 0.6545063490093891, "grad_norm": 680.276611328125, "learning_rate": 3.465649663302715e-06, "loss": 87.8101, "step": 79120 }, { "epoch": 0.6545890722587583, "grad_norm": 803.8458251953125, "learning_rate": 3.464307405500064e-06, "loss": 99.0789, "step": 79130 }, { "epoch": 0.6546717955081276, "grad_norm": 1364.3984375, "learning_rate": 3.4629652698854254e-06, "loss": 92.8128, "step": 79140 }, { "epoch": 0.6547545187574968, "grad_norm": 751.8421630859375, "learning_rate": 3.461623256565586e-06, "loss": 78.3212, "step": 79150 }, { "epoch": 0.654837242006866, "grad_norm": 862.8961181640625, "learning_rate": 3.4602813656473223e-06, "loss": 94.2935, "step": 79160 }, { "epoch": 0.6549199652562353, "grad_norm": 1315.6492919921875, "learning_rate": 3.4589395972374055e-06, "loss": 94.4292, "step": 79170 }, { "epoch": 0.6550026885056045, "grad_norm": 714.10546875, "learning_rate": 3.457597951442595e-06, "loss": 61.3309, "step": 79180 }, { "epoch": 0.6550854117549737, "grad_norm": 2307.056640625, "learning_rate": 3.456256428369633e-06, "loss": 95.6213, "step": 79190 }, { "epoch": 0.655168135004343, "grad_norm": 1832.7984619140625, "learning_rate": 3.4549150281252635e-06, "loss": 94.4903, "step": 79200 }, { "epoch": 0.6552508582537122, "grad_norm": 0.0, "learning_rate": 3.453573750816214e-06, "loss": 69.4073, "step": 79210 }, { "epoch": 0.6553335815030814, "grad_norm": 550.095947265625, "learning_rate": 3.452232596549204e-06, "loss": 83.0661, "step": 79220 }, { "epoch": 0.6554163047524507, "grad_norm": 833.4959716796875, "learning_rate": 3.4508915654309438e-06, "loss": 93.9101, "step": 79230 }, { "epoch": 0.6554990280018199, "grad_norm": 584.6357421875, "learning_rate": 3.4495506575681313e-06, "loss": 89.3119, "step": 79240 }, { "epoch": 0.6555817512511891, "grad_norm": 557.8964233398438, "learning_rate": 3.4482098730674577e-06, "loss": 78.8267, "step": 79250 }, { "epoch": 0.6556644745005584, "grad_norm": 1147.6644287109375, "learning_rate": 3.4468692120356017e-06, "loss": 115.0388, "step": 79260 }, { "epoch": 0.6557471977499276, "grad_norm": 1800.31494140625, "learning_rate": 3.4455286745792383e-06, "loss": 90.1449, "step": 79270 }, { "epoch": 0.6558299209992968, "grad_norm": 1146.2457275390625, "learning_rate": 3.4441882608050216e-06, "loss": 99.0781, "step": 79280 }, { "epoch": 0.6559126442486661, "grad_norm": 784.1929931640625, "learning_rate": 3.442847970819604e-06, "loss": 86.3861, "step": 79290 }, { "epoch": 0.6559953674980353, "grad_norm": 1415.5955810546875, "learning_rate": 3.441507804729627e-06, "loss": 112.1447, "step": 79300 }, { "epoch": 0.6560780907474045, "grad_norm": 643.2744140625, "learning_rate": 3.440167762641722e-06, "loss": 97.26, "step": 79310 }, { "epoch": 0.6561608139967738, "grad_norm": 990.8764038085938, "learning_rate": 3.43882784466251e-06, "loss": 70.3671, "step": 79320 }, { "epoch": 0.656243537246143, "grad_norm": 1008.8643188476562, "learning_rate": 3.4374880508986013e-06, "loss": 106.8238, "step": 79330 }, { "epoch": 0.6563262604955122, "grad_norm": 616.8389892578125, "learning_rate": 3.436148381456598e-06, "loss": 88.8686, "step": 79340 }, { "epoch": 0.6564089837448815, "grad_norm": 1004.9009399414062, "learning_rate": 3.434808836443091e-06, "loss": 79.8238, "step": 79350 }, { "epoch": 0.6564917069942507, "grad_norm": 1268.065673828125, "learning_rate": 3.4334694159646608e-06, "loss": 86.7182, "step": 79360 }, { "epoch": 0.6565744302436199, "grad_norm": 1004.0042724609375, "learning_rate": 3.43213012012788e-06, "loss": 108.0468, "step": 79370 }, { "epoch": 0.6566571534929893, "grad_norm": 563.8744506835938, "learning_rate": 3.43079094903931e-06, "loss": 74.4405, "step": 79380 }, { "epoch": 0.6567398767423585, "grad_norm": 410.0007019042969, "learning_rate": 3.4294519028055014e-06, "loss": 89.0153, "step": 79390 }, { "epoch": 0.6568225999917277, "grad_norm": 816.875, "learning_rate": 3.428112981532998e-06, "loss": 66.4603, "step": 79400 }, { "epoch": 0.656905323241097, "grad_norm": 665.1447143554688, "learning_rate": 3.4267741853283305e-06, "loss": 123.0643, "step": 79410 }, { "epoch": 0.6569880464904662, "grad_norm": 1130.827392578125, "learning_rate": 3.425435514298021e-06, "loss": 113.6049, "step": 79420 }, { "epoch": 0.6570707697398354, "grad_norm": 933.1907348632812, "learning_rate": 3.4240969685485813e-06, "loss": 85.3566, "step": 79430 }, { "epoch": 0.6571534929892047, "grad_norm": 930.4938354492188, "learning_rate": 3.422758548186515e-06, "loss": 110.3764, "step": 79440 }, { "epoch": 0.6572362162385739, "grad_norm": 698.0180053710938, "learning_rate": 3.4214202533183104e-06, "loss": 89.8429, "step": 79450 }, { "epoch": 0.6573189394879431, "grad_norm": 949.0037841796875, "learning_rate": 3.420082084050453e-06, "loss": 125.2701, "step": 79460 }, { "epoch": 0.6574016627373124, "grad_norm": 1515.08349609375, "learning_rate": 3.4187440404894123e-06, "loss": 111.6007, "step": 79470 }, { "epoch": 0.6574843859866816, "grad_norm": 531.5831298828125, "learning_rate": 3.417406122741651e-06, "loss": 115.491, "step": 79480 }, { "epoch": 0.6575671092360508, "grad_norm": 951.4327392578125, "learning_rate": 3.416068330913621e-06, "loss": 77.1653, "step": 79490 }, { "epoch": 0.6576498324854201, "grad_norm": 1479.0516357421875, "learning_rate": 3.4147306651117663e-06, "loss": 102.2539, "step": 79500 }, { "epoch": 0.6577325557347893, "grad_norm": 1768.2347412109375, "learning_rate": 3.4133931254425156e-06, "loss": 139.0158, "step": 79510 }, { "epoch": 0.6578152789841585, "grad_norm": 519.06103515625, "learning_rate": 3.4120557120122944e-06, "loss": 103.6259, "step": 79520 }, { "epoch": 0.6578980022335277, "grad_norm": 923.902587890625, "learning_rate": 3.4107184249275114e-06, "loss": 69.9052, "step": 79530 }, { "epoch": 0.657980725482897, "grad_norm": 812.4449462890625, "learning_rate": 3.4093812642945694e-06, "loss": 82.5353, "step": 79540 }, { "epoch": 0.6580634487322662, "grad_norm": 669.1686401367188, "learning_rate": 3.40804423021986e-06, "loss": 75.8008, "step": 79550 }, { "epoch": 0.6581461719816354, "grad_norm": 556.7330322265625, "learning_rate": 3.4067073228097656e-06, "loss": 79.9305, "step": 79560 }, { "epoch": 0.6582288952310047, "grad_norm": 828.5440673828125, "learning_rate": 3.4053705421706574e-06, "loss": 119.8367, "step": 79570 }, { "epoch": 0.6583116184803739, "grad_norm": 982.3250732421875, "learning_rate": 3.4040338884088955e-06, "loss": 103.1818, "step": 79580 }, { "epoch": 0.6583943417297431, "grad_norm": 556.98193359375, "learning_rate": 3.4026973616308334e-06, "loss": 127.7505, "step": 79590 }, { "epoch": 0.6584770649791124, "grad_norm": 930.4917602539062, "learning_rate": 3.401360961942812e-06, "loss": 85.2934, "step": 79600 }, { "epoch": 0.6585597882284816, "grad_norm": 1134.147705078125, "learning_rate": 3.4000246894511634e-06, "loss": 70.3682, "step": 79610 }, { "epoch": 0.6586425114778508, "grad_norm": 524.334228515625, "learning_rate": 3.398688544262205e-06, "loss": 74.7115, "step": 79620 }, { "epoch": 0.6587252347272201, "grad_norm": 1209.0157470703125, "learning_rate": 3.397352526482251e-06, "loss": 86.3904, "step": 79630 }, { "epoch": 0.6588079579765893, "grad_norm": 833.2462768554688, "learning_rate": 3.396016636217601e-06, "loss": 103.8662, "step": 79640 }, { "epoch": 0.6588906812259585, "grad_norm": 1902.74951171875, "learning_rate": 3.394680873574546e-06, "loss": 89.4633, "step": 79650 }, { "epoch": 0.6589734044753278, "grad_norm": 1193.58740234375, "learning_rate": 3.3933452386593666e-06, "loss": 83.5736, "step": 79660 }, { "epoch": 0.659056127724697, "grad_norm": 602.8118896484375, "learning_rate": 3.392009731578334e-06, "loss": 115.8359, "step": 79670 }, { "epoch": 0.6591388509740662, "grad_norm": 744.4619140625, "learning_rate": 3.3906743524377053e-06, "loss": 78.6802, "step": 79680 }, { "epoch": 0.6592215742234355, "grad_norm": 1028.5284423828125, "learning_rate": 3.3893391013437338e-06, "loss": 82.8485, "step": 79690 }, { "epoch": 0.6593042974728047, "grad_norm": 1039.501708984375, "learning_rate": 3.38800397840266e-06, "loss": 97.3438, "step": 79700 }, { "epoch": 0.6593870207221739, "grad_norm": 1091.6658935546875, "learning_rate": 3.3866689837207094e-06, "loss": 103.66, "step": 79710 }, { "epoch": 0.6594697439715432, "grad_norm": 1788.04345703125, "learning_rate": 3.3853341174041025e-06, "loss": 97.2377, "step": 79720 }, { "epoch": 0.6595524672209124, "grad_norm": 842.3888549804688, "learning_rate": 3.3839993795590507e-06, "loss": 101.914, "step": 79730 }, { "epoch": 0.6596351904702816, "grad_norm": 1032.7957763671875, "learning_rate": 3.3826647702917526e-06, "loss": 83.7671, "step": 79740 }, { "epoch": 0.6597179137196509, "grad_norm": 910.4108276367188, "learning_rate": 3.3813302897083955e-06, "loss": 114.1942, "step": 79750 }, { "epoch": 0.6598006369690201, "grad_norm": 710.4639282226562, "learning_rate": 3.379995937915158e-06, "loss": 84.6564, "step": 79760 }, { "epoch": 0.6598833602183893, "grad_norm": 667.979248046875, "learning_rate": 3.37866171501821e-06, "loss": 77.0747, "step": 79770 }, { "epoch": 0.6599660834677586, "grad_norm": 1670.4046630859375, "learning_rate": 3.3773276211237087e-06, "loss": 120.1049, "step": 79780 }, { "epoch": 0.6600488067171278, "grad_norm": 613.4122314453125, "learning_rate": 3.3759936563378004e-06, "loss": 83.0683, "step": 79790 }, { "epoch": 0.660131529966497, "grad_norm": 902.4213256835938, "learning_rate": 3.374659820766625e-06, "loss": 99.4036, "step": 79800 }, { "epoch": 0.6602142532158664, "grad_norm": 841.2720336914062, "learning_rate": 3.3733261145163064e-06, "loss": 112.4562, "step": 79810 }, { "epoch": 0.6602969764652356, "grad_norm": 609.0618286132812, "learning_rate": 3.371992537692964e-06, "loss": 95.7773, "step": 79820 }, { "epoch": 0.6603796997146048, "grad_norm": 1067.176025390625, "learning_rate": 3.370659090402704e-06, "loss": 115.8151, "step": 79830 }, { "epoch": 0.6604624229639741, "grad_norm": 1055.511474609375, "learning_rate": 3.3693257727516227e-06, "loss": 96.7919, "step": 79840 }, { "epoch": 0.6605451462133433, "grad_norm": 510.99395751953125, "learning_rate": 3.367992584845806e-06, "loss": 76.2172, "step": 79850 }, { "epoch": 0.6606278694627125, "grad_norm": 407.9087219238281, "learning_rate": 3.3666595267913293e-06, "loss": 86.5948, "step": 79860 }, { "epoch": 0.6607105927120818, "grad_norm": 1010.9825439453125, "learning_rate": 3.365326598694259e-06, "loss": 80.669, "step": 79870 }, { "epoch": 0.660793315961451, "grad_norm": 707.4743041992188, "learning_rate": 3.3639938006606483e-06, "loss": 89.7645, "step": 79880 }, { "epoch": 0.6608760392108202, "grad_norm": 1092.1805419921875, "learning_rate": 3.3626611327965418e-06, "loss": 103.0097, "step": 79890 }, { "epoch": 0.6609587624601895, "grad_norm": 530.4946899414062, "learning_rate": 3.3613285952079754e-06, "loss": 87.2853, "step": 79900 }, { "epoch": 0.6610414857095587, "grad_norm": 750.7198486328125, "learning_rate": 3.3599961880009713e-06, "loss": 80.3749, "step": 79910 }, { "epoch": 0.6611242089589279, "grad_norm": 728.5155029296875, "learning_rate": 3.3586639112815446e-06, "loss": 104.2869, "step": 79920 }, { "epoch": 0.6612069322082972, "grad_norm": 1172.182373046875, "learning_rate": 3.357331765155698e-06, "loss": 76.1472, "step": 79930 }, { "epoch": 0.6612896554576664, "grad_norm": 1009.7491455078125, "learning_rate": 3.355999749729424e-06, "loss": 83.3518, "step": 79940 }, { "epoch": 0.6613723787070356, "grad_norm": 865.0538940429688, "learning_rate": 3.354667865108706e-06, "loss": 121.849, "step": 79950 }, { "epoch": 0.6614551019564049, "grad_norm": 635.9669189453125, "learning_rate": 3.353336111399513e-06, "loss": 103.0901, "step": 79960 }, { "epoch": 0.6615378252057741, "grad_norm": 3667.30615234375, "learning_rate": 3.3520044887078096e-06, "loss": 148.1079, "step": 79970 }, { "epoch": 0.6616205484551433, "grad_norm": 1349.1805419921875, "learning_rate": 3.350672997139546e-06, "loss": 102.0127, "step": 79980 }, { "epoch": 0.6617032717045126, "grad_norm": 795.7191162109375, "learning_rate": 3.3493416368006614e-06, "loss": 75.6753, "step": 79990 }, { "epoch": 0.6617859949538818, "grad_norm": 692.658935546875, "learning_rate": 3.348010407797088e-06, "loss": 82.7135, "step": 80000 }, { "epoch": 0.661868718203251, "grad_norm": 1096.899169921875, "learning_rate": 3.346679310234744e-06, "loss": 79.7514, "step": 80010 }, { "epoch": 0.6619514414526203, "grad_norm": 1015.4619140625, "learning_rate": 3.34534834421954e-06, "loss": 59.5193, "step": 80020 }, { "epoch": 0.6620341647019895, "grad_norm": 537.7772216796875, "learning_rate": 3.3440175098573748e-06, "loss": 87.1953, "step": 80030 }, { "epoch": 0.6621168879513587, "grad_norm": 721.2218627929688, "learning_rate": 3.3426868072541386e-06, "loss": 81.0099, "step": 80040 }, { "epoch": 0.662199611200728, "grad_norm": 862.6383666992188, "learning_rate": 3.3413562365157037e-06, "loss": 82.9833, "step": 80050 }, { "epoch": 0.6622823344500972, "grad_norm": 879.6834106445312, "learning_rate": 3.340025797747942e-06, "loss": 72.3241, "step": 80060 }, { "epoch": 0.6623650576994664, "grad_norm": 831.2039184570312, "learning_rate": 3.3386954910567094e-06, "loss": 66.0481, "step": 80070 }, { "epoch": 0.6624477809488357, "grad_norm": 699.9284057617188, "learning_rate": 3.337365316547852e-06, "loss": 98.672, "step": 80080 }, { "epoch": 0.6625305041982049, "grad_norm": 831.2557983398438, "learning_rate": 3.336035274327206e-06, "loss": 99.8643, "step": 80090 }, { "epoch": 0.6626132274475741, "grad_norm": 1425.111328125, "learning_rate": 3.3347053645005965e-06, "loss": 82.7985, "step": 80100 }, { "epoch": 0.6626959506969434, "grad_norm": 794.6343994140625, "learning_rate": 3.333375587173838e-06, "loss": 107.7234, "step": 80110 }, { "epoch": 0.6627786739463126, "grad_norm": 481.92596435546875, "learning_rate": 3.332045942452738e-06, "loss": 62.8261, "step": 80120 }, { "epoch": 0.6628613971956818, "grad_norm": 651.9033813476562, "learning_rate": 3.330716430443085e-06, "loss": 76.272, "step": 80130 }, { "epoch": 0.6629441204450511, "grad_norm": 692.5248413085938, "learning_rate": 3.329387051250664e-06, "loss": 92.6327, "step": 80140 }, { "epoch": 0.6630268436944203, "grad_norm": 537.0725708007812, "learning_rate": 3.3280578049812493e-06, "loss": 73.6244, "step": 80150 }, { "epoch": 0.6631095669437895, "grad_norm": 891.6332397460938, "learning_rate": 3.3267286917406027e-06, "loss": 67.5265, "step": 80160 }, { "epoch": 0.6631922901931588, "grad_norm": 662.6693115234375, "learning_rate": 3.3253997116344737e-06, "loss": 102.1911, "step": 80170 }, { "epoch": 0.663275013442528, "grad_norm": 1005.4918823242188, "learning_rate": 3.3240708647686047e-06, "loss": 76.6256, "step": 80180 }, { "epoch": 0.6633577366918972, "grad_norm": 629.626953125, "learning_rate": 3.322742151248726e-06, "loss": 87.3736, "step": 80190 }, { "epoch": 0.6634404599412665, "grad_norm": 760.7479858398438, "learning_rate": 3.3214135711805555e-06, "loss": 96.766, "step": 80200 }, { "epoch": 0.6635231831906357, "grad_norm": 556.74365234375, "learning_rate": 3.3200851246698053e-06, "loss": 77.8871, "step": 80210 }, { "epoch": 0.663605906440005, "grad_norm": 645.0292358398438, "learning_rate": 3.318756811822171e-06, "loss": 74.2956, "step": 80220 }, { "epoch": 0.6636886296893741, "grad_norm": 1107.0675048828125, "learning_rate": 3.3174286327433408e-06, "loss": 87.9465, "step": 80230 }, { "epoch": 0.6637713529387435, "grad_norm": 770.3927001953125, "learning_rate": 3.3161005875389916e-06, "loss": 91.9657, "step": 80240 }, { "epoch": 0.6638540761881127, "grad_norm": 1054.7138671875, "learning_rate": 3.3147726763147913e-06, "loss": 84.3892, "step": 80250 }, { "epoch": 0.6639367994374819, "grad_norm": 1000.5108032226562, "learning_rate": 3.3134448991763957e-06, "loss": 101.0391, "step": 80260 }, { "epoch": 0.6640195226868512, "grad_norm": 588.8154296875, "learning_rate": 3.312117256229449e-06, "loss": 91.1146, "step": 80270 }, { "epoch": 0.6641022459362204, "grad_norm": 722.9697875976562, "learning_rate": 3.310789747579586e-06, "loss": 84.0725, "step": 80280 }, { "epoch": 0.6641849691855896, "grad_norm": 645.8081665039062, "learning_rate": 3.30946237333243e-06, "loss": 92.8005, "step": 80290 }, { "epoch": 0.6642676924349589, "grad_norm": 1031.3046875, "learning_rate": 3.308135133593595e-06, "loss": 108.4536, "step": 80300 }, { "epoch": 0.6643504156843281, "grad_norm": 596.3705444335938, "learning_rate": 3.3068080284686825e-06, "loss": 104.7899, "step": 80310 }, { "epoch": 0.6644331389336973, "grad_norm": 792.6683349609375, "learning_rate": 3.3054810580632844e-06, "loss": 100.9892, "step": 80320 }, { "epoch": 0.6645158621830666, "grad_norm": 803.8338623046875, "learning_rate": 3.304154222482982e-06, "loss": 72.7554, "step": 80330 }, { "epoch": 0.6645985854324358, "grad_norm": 1118.839111328125, "learning_rate": 3.3028275218333438e-06, "loss": 67.1966, "step": 80340 }, { "epoch": 0.664681308681805, "grad_norm": 929.019287109375, "learning_rate": 3.301500956219932e-06, "loss": 107.5598, "step": 80350 }, { "epoch": 0.6647640319311743, "grad_norm": 1357.971923828125, "learning_rate": 3.3001745257482935e-06, "loss": 130.7297, "step": 80360 }, { "epoch": 0.6648467551805435, "grad_norm": 299.3155212402344, "learning_rate": 3.2988482305239673e-06, "loss": 80.1713, "step": 80370 }, { "epoch": 0.6649294784299127, "grad_norm": 658.03125, "learning_rate": 3.2975220706524813e-06, "loss": 120.3528, "step": 80380 }, { "epoch": 0.665012201679282, "grad_norm": 1064.147216796875, "learning_rate": 3.2961960462393492e-06, "loss": 102.2007, "step": 80390 }, { "epoch": 0.6650949249286512, "grad_norm": 783.42431640625, "learning_rate": 3.2948701573900786e-06, "loss": 104.9912, "step": 80400 }, { "epoch": 0.6651776481780204, "grad_norm": 729.3176879882812, "learning_rate": 3.2935444042101646e-06, "loss": 95.9438, "step": 80410 }, { "epoch": 0.6652603714273897, "grad_norm": 819.0823974609375, "learning_rate": 3.29221878680509e-06, "loss": 118.4774, "step": 80420 }, { "epoch": 0.6653430946767589, "grad_norm": 698.5296020507812, "learning_rate": 3.2908933052803292e-06, "loss": 79.8682, "step": 80430 }, { "epoch": 0.6654258179261281, "grad_norm": 1220.9241943359375, "learning_rate": 3.2895679597413433e-06, "loss": 97.124, "step": 80440 }, { "epoch": 0.6655085411754974, "grad_norm": 1185.635009765625, "learning_rate": 3.2882427502935867e-06, "loss": 80.9898, "step": 80450 }, { "epoch": 0.6655912644248666, "grad_norm": 1041.6197509765625, "learning_rate": 3.2869176770424976e-06, "loss": 91.9751, "step": 80460 }, { "epoch": 0.6656739876742358, "grad_norm": 909.6505737304688, "learning_rate": 3.2855927400935085e-06, "loss": 83.0227, "step": 80470 }, { "epoch": 0.6657567109236051, "grad_norm": 978.8333740234375, "learning_rate": 3.2842679395520363e-06, "loss": 87.613, "step": 80480 }, { "epoch": 0.6658394341729743, "grad_norm": 1578.8958740234375, "learning_rate": 3.282943275523489e-06, "loss": 86.7915, "step": 80490 }, { "epoch": 0.6659221574223435, "grad_norm": 1016.648681640625, "learning_rate": 3.2816187481132655e-06, "loss": 89.614, "step": 80500 }, { "epoch": 0.6660048806717128, "grad_norm": 795.9924926757812, "learning_rate": 3.280294357426752e-06, "loss": 92.8654, "step": 80510 }, { "epoch": 0.666087603921082, "grad_norm": 486.5448913574219, "learning_rate": 3.2789701035693242e-06, "loss": 68.977, "step": 80520 }, { "epoch": 0.6661703271704512, "grad_norm": 1071.9464111328125, "learning_rate": 3.277645986646346e-06, "loss": 97.0568, "step": 80530 }, { "epoch": 0.6662530504198205, "grad_norm": 1014.2429809570312, "learning_rate": 3.276322006763172e-06, "loss": 111.9005, "step": 80540 }, { "epoch": 0.6663357736691897, "grad_norm": 571.8171997070312, "learning_rate": 3.274998164025148e-06, "loss": 73.2075, "step": 80550 }, { "epoch": 0.6664184969185589, "grad_norm": 1098.88720703125, "learning_rate": 3.2736744585376016e-06, "loss": 73.6912, "step": 80560 }, { "epoch": 0.6665012201679282, "grad_norm": 1065.5684814453125, "learning_rate": 3.2723508904058547e-06, "loss": 86.9936, "step": 80570 }, { "epoch": 0.6665839434172974, "grad_norm": 1688.9266357421875, "learning_rate": 3.27102745973522e-06, "loss": 95.265, "step": 80580 }, { "epoch": 0.6666666666666666, "grad_norm": 1830.2410888671875, "learning_rate": 3.269704166630995e-06, "loss": 107.0393, "step": 80590 }, { "epoch": 0.6667493899160359, "grad_norm": 662.8126831054688, "learning_rate": 3.268381011198468e-06, "loss": 81.1234, "step": 80600 }, { "epoch": 0.6668321131654051, "grad_norm": 780.1619873046875, "learning_rate": 3.2670579935429176e-06, "loss": 72.5676, "step": 80610 }, { "epoch": 0.6669148364147743, "grad_norm": 900.4564819335938, "learning_rate": 3.265735113769609e-06, "loss": 72.4489, "step": 80620 }, { "epoch": 0.6669975596641436, "grad_norm": 737.3795166015625, "learning_rate": 3.264412371983797e-06, "loss": 104.431, "step": 80630 }, { "epoch": 0.6670802829135128, "grad_norm": 705.3508911132812, "learning_rate": 3.2630897682907312e-06, "loss": 82.3283, "step": 80640 }, { "epoch": 0.667163006162882, "grad_norm": 1338.135498046875, "learning_rate": 3.261767302795639e-06, "loss": 102.0957, "step": 80650 }, { "epoch": 0.6672457294122514, "grad_norm": 1417.84375, "learning_rate": 3.2604449756037447e-06, "loss": 140.2835, "step": 80660 }, { "epoch": 0.6673284526616206, "grad_norm": 899.5838012695312, "learning_rate": 3.2591227868202592e-06, "loss": 106.4462, "step": 80670 }, { "epoch": 0.6674111759109898, "grad_norm": 1201.1510009765625, "learning_rate": 3.257800736550385e-06, "loss": 88.8766, "step": 80680 }, { "epoch": 0.6674938991603591, "grad_norm": 1101.5433349609375, "learning_rate": 3.2564788248993105e-06, "loss": 91.0437, "step": 80690 }, { "epoch": 0.6675766224097283, "grad_norm": 609.85400390625, "learning_rate": 3.2551570519722155e-06, "loss": 104.3016, "step": 80700 }, { "epoch": 0.6676593456590975, "grad_norm": 1198.7747802734375, "learning_rate": 3.2538354178742648e-06, "loss": 102.0451, "step": 80710 }, { "epoch": 0.6677420689084668, "grad_norm": 494.413818359375, "learning_rate": 3.2525139227106163e-06, "loss": 70.2176, "step": 80720 }, { "epoch": 0.667824792157836, "grad_norm": 614.208251953125, "learning_rate": 3.2511925665864164e-06, "loss": 67.1571, "step": 80730 }, { "epoch": 0.6679075154072052, "grad_norm": 730.7908935546875, "learning_rate": 3.2498713496067963e-06, "loss": 73.7568, "step": 80740 }, { "epoch": 0.6679902386565745, "grad_norm": 1220.75341796875, "learning_rate": 3.2485502718768814e-06, "loss": 103.8804, "step": 80750 }, { "epoch": 0.6680729619059437, "grad_norm": 909.8517456054688, "learning_rate": 3.2472293335017836e-06, "loss": 90.6027, "step": 80760 }, { "epoch": 0.6681556851553129, "grad_norm": 1850.9075927734375, "learning_rate": 3.245908534586602e-06, "loss": 87.6312, "step": 80770 }, { "epoch": 0.6682384084046822, "grad_norm": 984.0964965820312, "learning_rate": 3.2445878752364298e-06, "loss": 94.8259, "step": 80780 }, { "epoch": 0.6683211316540514, "grad_norm": 605.5479736328125, "learning_rate": 3.2432673555563433e-06, "loss": 113.5487, "step": 80790 }, { "epoch": 0.6684038549034206, "grad_norm": 800.7566528320312, "learning_rate": 3.2419469756514116e-06, "loss": 99.107, "step": 80800 }, { "epoch": 0.6684865781527899, "grad_norm": 1075.5169677734375, "learning_rate": 3.2406267356266918e-06, "loss": 82.8865, "step": 80810 }, { "epoch": 0.6685693014021591, "grad_norm": 1107.46240234375, "learning_rate": 3.2393066355872264e-06, "loss": 75.885, "step": 80820 }, { "epoch": 0.6686520246515283, "grad_norm": 1359.361083984375, "learning_rate": 3.237986675638052e-06, "loss": 115.8543, "step": 80830 }, { "epoch": 0.6687347479008976, "grad_norm": 1057.6058349609375, "learning_rate": 3.236666855884192e-06, "loss": 75.9444, "step": 80840 }, { "epoch": 0.6688174711502668, "grad_norm": 1190.32421875, "learning_rate": 3.2353471764306567e-06, "loss": 97.1294, "step": 80850 }, { "epoch": 0.668900194399636, "grad_norm": 431.67431640625, "learning_rate": 3.234027637382447e-06, "loss": 78.7077, "step": 80860 }, { "epoch": 0.6689829176490053, "grad_norm": 1030.784912109375, "learning_rate": 3.2327082388445545e-06, "loss": 123.1514, "step": 80870 }, { "epoch": 0.6690656408983745, "grad_norm": 712.6047973632812, "learning_rate": 3.2313889809219568e-06, "loss": 93.2329, "step": 80880 }, { "epoch": 0.6691483641477437, "grad_norm": 793.4395141601562, "learning_rate": 3.2300698637196217e-06, "loss": 88.246, "step": 80890 }, { "epoch": 0.669231087397113, "grad_norm": 594.5629272460938, "learning_rate": 3.2287508873425043e-06, "loss": 66.3094, "step": 80900 }, { "epoch": 0.6693138106464822, "grad_norm": 589.426513671875, "learning_rate": 3.22743205189555e-06, "loss": 72.8444, "step": 80910 }, { "epoch": 0.6693965338958514, "grad_norm": 545.673095703125, "learning_rate": 3.2261133574836918e-06, "loss": 89.7926, "step": 80920 }, { "epoch": 0.6694792571452207, "grad_norm": 809.666748046875, "learning_rate": 3.2247948042118525e-06, "loss": 81.8926, "step": 80930 }, { "epoch": 0.6695619803945899, "grad_norm": 1300.5181884765625, "learning_rate": 3.223476392184944e-06, "loss": 93.8708, "step": 80940 }, { "epoch": 0.6696447036439591, "grad_norm": 767.7609252929688, "learning_rate": 3.2221581215078656e-06, "loss": 86.6903, "step": 80950 }, { "epoch": 0.6697274268933283, "grad_norm": 568.2315063476562, "learning_rate": 3.2208399922855055e-06, "loss": 88.5472, "step": 80960 }, { "epoch": 0.6698101501426976, "grad_norm": 971.0084228515625, "learning_rate": 3.2195220046227425e-06, "loss": 104.4163, "step": 80970 }, { "epoch": 0.6698928733920668, "grad_norm": 955.1143188476562, "learning_rate": 3.218204158624445e-06, "loss": 71.1244, "step": 80980 }, { "epoch": 0.669975596641436, "grad_norm": 953.3546142578125, "learning_rate": 3.216886454395463e-06, "loss": 89.6237, "step": 80990 }, { "epoch": 0.6700583198908053, "grad_norm": 684.8885498046875, "learning_rate": 3.2155688920406415e-06, "loss": 99.5519, "step": 81000 }, { "epoch": 0.6701410431401745, "grad_norm": 1077.44677734375, "learning_rate": 3.2142514716648143e-06, "loss": 82.6027, "step": 81010 }, { "epoch": 0.6702237663895437, "grad_norm": 455.7202453613281, "learning_rate": 3.212934193372803e-06, "loss": 94.583, "step": 81020 }, { "epoch": 0.670306489638913, "grad_norm": 445.9930419921875, "learning_rate": 3.2116170572694156e-06, "loss": 94.4956, "step": 81030 }, { "epoch": 0.6703892128882822, "grad_norm": 668.4183349609375, "learning_rate": 3.2103000634594518e-06, "loss": 96.0746, "step": 81040 }, { "epoch": 0.6704719361376514, "grad_norm": 1390.123291015625, "learning_rate": 3.2089832120476983e-06, "loss": 89.5325, "step": 81050 }, { "epoch": 0.6705546593870207, "grad_norm": 847.5530395507812, "learning_rate": 3.2076665031389294e-06, "loss": 92.6629, "step": 81060 }, { "epoch": 0.67063738263639, "grad_norm": 1006.8488159179688, "learning_rate": 3.2063499368379146e-06, "loss": 84.7229, "step": 81070 }, { "epoch": 0.6707201058857591, "grad_norm": 802.8297119140625, "learning_rate": 3.2050335132494014e-06, "loss": 91.7173, "step": 81080 }, { "epoch": 0.6708028291351285, "grad_norm": 806.7569580078125, "learning_rate": 3.203717232478133e-06, "loss": 95.0556, "step": 81090 }, { "epoch": 0.6708855523844977, "grad_norm": 1519.580810546875, "learning_rate": 3.2024010946288415e-06, "loss": 93.1881, "step": 81100 }, { "epoch": 0.6709682756338669, "grad_norm": 539.17919921875, "learning_rate": 3.201085099806245e-06, "loss": 98.1286, "step": 81110 }, { "epoch": 0.6710509988832362, "grad_norm": 1128.193359375, "learning_rate": 3.199769248115051e-06, "loss": 89.5094, "step": 81120 }, { "epoch": 0.6711337221326054, "grad_norm": 1057.2535400390625, "learning_rate": 3.1984535396599565e-06, "loss": 103.7013, "step": 81130 }, { "epoch": 0.6712164453819746, "grad_norm": 699.2178955078125, "learning_rate": 3.1971379745456452e-06, "loss": 88.7541, "step": 81140 }, { "epoch": 0.6712991686313439, "grad_norm": 515.211669921875, "learning_rate": 3.1958225528767918e-06, "loss": 85.0595, "step": 81150 }, { "epoch": 0.6713818918807131, "grad_norm": 1344.10595703125, "learning_rate": 3.1945072747580585e-06, "loss": 89.7821, "step": 81160 }, { "epoch": 0.6714646151300823, "grad_norm": 1213.0770263671875, "learning_rate": 3.1931921402940946e-06, "loss": 73.3116, "step": 81170 }, { "epoch": 0.6715473383794516, "grad_norm": 654.7890625, "learning_rate": 3.1918771495895395e-06, "loss": 78.7428, "step": 81180 }, { "epoch": 0.6716300616288208, "grad_norm": 1164.929443359375, "learning_rate": 3.1905623027490205e-06, "loss": 84.9769, "step": 81190 }, { "epoch": 0.67171278487819, "grad_norm": 1309.23779296875, "learning_rate": 3.1892475998771567e-06, "loss": 81.4625, "step": 81200 }, { "epoch": 0.6717955081275593, "grad_norm": 654.0279541015625, "learning_rate": 3.1879330410785503e-06, "loss": 71.7905, "step": 81210 }, { "epoch": 0.6718782313769285, "grad_norm": 503.28863525390625, "learning_rate": 3.186618626457796e-06, "loss": 82.309, "step": 81220 }, { "epoch": 0.6719609546262977, "grad_norm": 951.95654296875, "learning_rate": 3.1853043561194748e-06, "loss": 88.6264, "step": 81230 }, { "epoch": 0.672043677875667, "grad_norm": 778.96484375, "learning_rate": 3.183990230168159e-06, "loss": 87.3521, "step": 81240 }, { "epoch": 0.6721264011250362, "grad_norm": 1273.863525390625, "learning_rate": 3.1826762487084053e-06, "loss": 96.637, "step": 81250 }, { "epoch": 0.6722091243744054, "grad_norm": 951.4760131835938, "learning_rate": 3.1813624118447615e-06, "loss": 77.3764, "step": 81260 }, { "epoch": 0.6722918476237747, "grad_norm": 613.3677978515625, "learning_rate": 3.180048719681765e-06, "loss": 81.409, "step": 81270 }, { "epoch": 0.6723745708731439, "grad_norm": 1063.3297119140625, "learning_rate": 3.178735172323939e-06, "loss": 68.0863, "step": 81280 }, { "epoch": 0.6724572941225131, "grad_norm": 799.1066284179688, "learning_rate": 3.177421769875796e-06, "loss": 90.396, "step": 81290 }, { "epoch": 0.6725400173718824, "grad_norm": 1305.4615478515625, "learning_rate": 3.176108512441839e-06, "loss": 110.8276, "step": 81300 }, { "epoch": 0.6726227406212516, "grad_norm": 630.9846801757812, "learning_rate": 3.174795400126557e-06, "loss": 84.7067, "step": 81310 }, { "epoch": 0.6727054638706208, "grad_norm": 2631.676025390625, "learning_rate": 3.173482433034429e-06, "loss": 118.5525, "step": 81320 }, { "epoch": 0.6727881871199901, "grad_norm": 1203.627197265625, "learning_rate": 3.1721696112699217e-06, "loss": 102.9443, "step": 81330 }, { "epoch": 0.6728709103693593, "grad_norm": 1666.17236328125, "learning_rate": 3.1708569349374896e-06, "loss": 100.6521, "step": 81340 }, { "epoch": 0.6729536336187285, "grad_norm": 969.8807983398438, "learning_rate": 3.1695444041415757e-06, "loss": 88.7701, "step": 81350 }, { "epoch": 0.6730363568680978, "grad_norm": 1491.0135498046875, "learning_rate": 3.1682320189866133e-06, "loss": 87.3877, "step": 81360 }, { "epoch": 0.673119080117467, "grad_norm": 506.7419128417969, "learning_rate": 3.1669197795770225e-06, "loss": 107.3529, "step": 81370 }, { "epoch": 0.6732018033668362, "grad_norm": 839.7028198242188, "learning_rate": 3.165607686017212e-06, "loss": 62.6055, "step": 81380 }, { "epoch": 0.6732845266162055, "grad_norm": 372.7325134277344, "learning_rate": 3.164295738411578e-06, "loss": 79.6807, "step": 81390 }, { "epoch": 0.6733672498655747, "grad_norm": 940.877685546875, "learning_rate": 3.1629839368645087e-06, "loss": 109.5253, "step": 81400 }, { "epoch": 0.6734499731149439, "grad_norm": 571.5814208984375, "learning_rate": 3.161672281480379e-06, "loss": 101.1825, "step": 81410 }, { "epoch": 0.6735326963643132, "grad_norm": 1666.25634765625, "learning_rate": 3.1603607723635455e-06, "loss": 92.3682, "step": 81420 }, { "epoch": 0.6736154196136824, "grad_norm": 394.33026123046875, "learning_rate": 3.1590494096183643e-06, "loss": 68.1128, "step": 81430 }, { "epoch": 0.6736981428630516, "grad_norm": 943.1664428710938, "learning_rate": 3.1577381933491718e-06, "loss": 92.4026, "step": 81440 }, { "epoch": 0.6737808661124209, "grad_norm": 479.8966064453125, "learning_rate": 3.156427123660297e-06, "loss": 101.9956, "step": 81450 }, { "epoch": 0.6738635893617901, "grad_norm": 1042.3021240234375, "learning_rate": 3.1551162006560554e-06, "loss": 92.2319, "step": 81460 }, { "epoch": 0.6739463126111593, "grad_norm": 640.0420532226562, "learning_rate": 3.15380542444075e-06, "loss": 115.9655, "step": 81470 }, { "epoch": 0.6740290358605286, "grad_norm": 636.8284912109375, "learning_rate": 3.1524947951186746e-06, "loss": 80.7586, "step": 81480 }, { "epoch": 0.6741117591098978, "grad_norm": 886.6581420898438, "learning_rate": 3.1511843127941085e-06, "loss": 116.3692, "step": 81490 }, { "epoch": 0.674194482359267, "grad_norm": 881.8652954101562, "learning_rate": 3.149873977571324e-06, "loss": 81.8739, "step": 81500 }, { "epoch": 0.6742772056086364, "grad_norm": 663.4722900390625, "learning_rate": 3.148563789554575e-06, "loss": 74.8076, "step": 81510 }, { "epoch": 0.6743599288580056, "grad_norm": 1380.9359130859375, "learning_rate": 3.147253748848107e-06, "loss": 138.0078, "step": 81520 }, { "epoch": 0.6744426521073748, "grad_norm": 402.91241455078125, "learning_rate": 3.1459438555561565e-06, "loss": 111.0916, "step": 81530 }, { "epoch": 0.6745253753567441, "grad_norm": 256.97442626953125, "learning_rate": 3.1446341097829446e-06, "loss": 81.1193, "step": 81540 }, { "epoch": 0.6746080986061133, "grad_norm": 847.0762329101562, "learning_rate": 3.1433245116326812e-06, "loss": 80.5571, "step": 81550 }, { "epoch": 0.6746908218554825, "grad_norm": 387.09881591796875, "learning_rate": 3.1420150612095653e-06, "loss": 85.5534, "step": 81560 }, { "epoch": 0.6747735451048518, "grad_norm": 1107.9180908203125, "learning_rate": 3.140705758617784e-06, "loss": 92.0838, "step": 81570 }, { "epoch": 0.674856268354221, "grad_norm": 940.799560546875, "learning_rate": 3.139396603961512e-06, "loss": 95.1766, "step": 81580 }, { "epoch": 0.6749389916035902, "grad_norm": 753.7284545898438, "learning_rate": 3.1380875973449155e-06, "loss": 79.7191, "step": 81590 }, { "epoch": 0.6750217148529595, "grad_norm": 1136.5980224609375, "learning_rate": 3.1367787388721427e-06, "loss": 69.0357, "step": 81600 }, { "epoch": 0.6751044381023287, "grad_norm": 724.4802856445312, "learning_rate": 3.135470028647334e-06, "loss": 87.2878, "step": 81610 }, { "epoch": 0.6751871613516979, "grad_norm": 742.6774291992188, "learning_rate": 3.134161466774617e-06, "loss": 72.0161, "step": 81620 }, { "epoch": 0.6752698846010672, "grad_norm": 855.7781372070312, "learning_rate": 3.1328530533581102e-06, "loss": 76.9828, "step": 81630 }, { "epoch": 0.6753526078504364, "grad_norm": 848.9146118164062, "learning_rate": 3.131544788501917e-06, "loss": 57.4658, "step": 81640 }, { "epoch": 0.6754353310998056, "grad_norm": 800.960205078125, "learning_rate": 3.1302366723101294e-06, "loss": 94.8087, "step": 81650 }, { "epoch": 0.6755180543491749, "grad_norm": 1037.375, "learning_rate": 3.12892870488683e-06, "loss": 102.3649, "step": 81660 }, { "epoch": 0.6756007775985441, "grad_norm": 530.1427001953125, "learning_rate": 3.1276208863360862e-06, "loss": 79.7667, "step": 81670 }, { "epoch": 0.6756835008479133, "grad_norm": 1380.489501953125, "learning_rate": 3.126313216761955e-06, "loss": 76.0104, "step": 81680 }, { "epoch": 0.6757662240972825, "grad_norm": 503.63250732421875, "learning_rate": 3.125005696268482e-06, "loss": 117.2083, "step": 81690 }, { "epoch": 0.6758489473466518, "grad_norm": 867.2805786132812, "learning_rate": 3.1236983249597007e-06, "loss": 107.8847, "step": 81700 }, { "epoch": 0.675931670596021, "grad_norm": 753.2324829101562, "learning_rate": 3.1223911029396324e-06, "loss": 78.1021, "step": 81710 }, { "epoch": 0.6760143938453902, "grad_norm": 1229.90673828125, "learning_rate": 3.121084030312286e-06, "loss": 83.4004, "step": 81720 }, { "epoch": 0.6760971170947595, "grad_norm": 641.0833740234375, "learning_rate": 3.1197771071816617e-06, "loss": 97.8247, "step": 81730 }, { "epoch": 0.6761798403441287, "grad_norm": 761.6908569335938, "learning_rate": 3.118470333651744e-06, "loss": 86.3788, "step": 81740 }, { "epoch": 0.6762625635934979, "grad_norm": 763.9179077148438, "learning_rate": 3.1171637098265063e-06, "loss": 107.4264, "step": 81750 }, { "epoch": 0.6763452868428672, "grad_norm": 1150.12890625, "learning_rate": 3.1158572358099127e-06, "loss": 118.2742, "step": 81760 }, { "epoch": 0.6764280100922364, "grad_norm": 1080.7073974609375, "learning_rate": 3.11455091170591e-06, "loss": 106.7814, "step": 81770 }, { "epoch": 0.6765107333416056, "grad_norm": 642.6464233398438, "learning_rate": 3.1132447376184383e-06, "loss": 99.3273, "step": 81780 }, { "epoch": 0.6765934565909749, "grad_norm": 808.6451416015625, "learning_rate": 3.1119387136514246e-06, "loss": 75.5224, "step": 81790 }, { "epoch": 0.6766761798403441, "grad_norm": 1050.21533203125, "learning_rate": 3.1106328399087814e-06, "loss": 79.97, "step": 81800 }, { "epoch": 0.6767589030897133, "grad_norm": 1466.090087890625, "learning_rate": 3.1093271164944116e-06, "loss": 89.5285, "step": 81810 }, { "epoch": 0.6768416263390826, "grad_norm": 1522.2515869140625, "learning_rate": 3.1080215435122072e-06, "loss": 116.0093, "step": 81820 }, { "epoch": 0.6769243495884518, "grad_norm": 1189.734375, "learning_rate": 3.106716121066046e-06, "loss": 100.7569, "step": 81830 }, { "epoch": 0.677007072837821, "grad_norm": 1023.820556640625, "learning_rate": 3.105410849259796e-06, "loss": 89.4581, "step": 81840 }, { "epoch": 0.6770897960871903, "grad_norm": 938.9853515625, "learning_rate": 3.104105728197306e-06, "loss": 85.5331, "step": 81850 }, { "epoch": 0.6771725193365595, "grad_norm": 752.8026123046875, "learning_rate": 3.1028007579824234e-06, "loss": 93.6751, "step": 81860 }, { "epoch": 0.6772552425859287, "grad_norm": 913.8092041015625, "learning_rate": 3.1014959387189774e-06, "loss": 68.4734, "step": 81870 }, { "epoch": 0.677337965835298, "grad_norm": 593.3901977539062, "learning_rate": 3.1001912705107874e-06, "loss": 82.684, "step": 81880 }, { "epoch": 0.6774206890846672, "grad_norm": 824.601318359375, "learning_rate": 3.0988867534616586e-06, "loss": 70.1524, "step": 81890 }, { "epoch": 0.6775034123340364, "grad_norm": 495.2556457519531, "learning_rate": 3.097582387675385e-06, "loss": 101.8257, "step": 81900 }, { "epoch": 0.6775861355834057, "grad_norm": 515.7427978515625, "learning_rate": 3.09627817325575e-06, "loss": 109.3507, "step": 81910 }, { "epoch": 0.677668858832775, "grad_norm": 457.17315673828125, "learning_rate": 3.0949741103065246e-06, "loss": 102.0512, "step": 81920 }, { "epoch": 0.6777515820821441, "grad_norm": 652.50537109375, "learning_rate": 3.093670198931469e-06, "loss": 70.5868, "step": 81930 }, { "epoch": 0.6778343053315135, "grad_norm": 1835.66748046875, "learning_rate": 3.0923664392343233e-06, "loss": 87.3482, "step": 81940 }, { "epoch": 0.6779170285808827, "grad_norm": 632.263671875, "learning_rate": 3.091062831318825e-06, "loss": 104.5366, "step": 81950 }, { "epoch": 0.6779997518302519, "grad_norm": 766.2409057617188, "learning_rate": 3.089759375288698e-06, "loss": 81.9699, "step": 81960 }, { "epoch": 0.6780824750796212, "grad_norm": 743.1511840820312, "learning_rate": 3.0884560712476497e-06, "loss": 110.5213, "step": 81970 }, { "epoch": 0.6781651983289904, "grad_norm": 979.716064453125, "learning_rate": 3.0871529192993794e-06, "loss": 85.1489, "step": 81980 }, { "epoch": 0.6782479215783596, "grad_norm": 713.2177124023438, "learning_rate": 3.085849919547572e-06, "loss": 92.1346, "step": 81990 }, { "epoch": 0.6783306448277289, "grad_norm": 1064.33837890625, "learning_rate": 3.0845470720959027e-06, "loss": 92.5992, "step": 82000 }, { "epoch": 0.6784133680770981, "grad_norm": 541.3108520507812, "learning_rate": 3.08324437704803e-06, "loss": 81.4435, "step": 82010 }, { "epoch": 0.6784960913264673, "grad_norm": 867.9805908203125, "learning_rate": 3.0819418345076095e-06, "loss": 86.1453, "step": 82020 }, { "epoch": 0.6785788145758366, "grad_norm": 1205.291259765625, "learning_rate": 3.080639444578272e-06, "loss": 77.0935, "step": 82030 }, { "epoch": 0.6786615378252058, "grad_norm": 821.0764770507812, "learning_rate": 3.0793372073636455e-06, "loss": 76.2802, "step": 82040 }, { "epoch": 0.678744261074575, "grad_norm": 1098.4530029296875, "learning_rate": 3.0780351229673423e-06, "loss": 99.049, "step": 82050 }, { "epoch": 0.6788269843239443, "grad_norm": 1023.1441650390625, "learning_rate": 3.0767331914929638e-06, "loss": 86.1433, "step": 82060 }, { "epoch": 0.6789097075733135, "grad_norm": 648.2615966796875, "learning_rate": 3.075431413044099e-06, "loss": 67.8278, "step": 82070 }, { "epoch": 0.6789924308226827, "grad_norm": 800.4606323242188, "learning_rate": 3.074129787724324e-06, "loss": 87.548, "step": 82080 }, { "epoch": 0.679075154072052, "grad_norm": 1863.6453857421875, "learning_rate": 3.072828315637203e-06, "loss": 109.8302, "step": 82090 }, { "epoch": 0.6791578773214212, "grad_norm": 1212.3822021484375, "learning_rate": 3.0715269968862898e-06, "loss": 122.7618, "step": 82100 }, { "epoch": 0.6792406005707904, "grad_norm": 599.944580078125, "learning_rate": 3.0702258315751223e-06, "loss": 69.2954, "step": 82110 }, { "epoch": 0.6793233238201597, "grad_norm": 1033.164794921875, "learning_rate": 3.0689248198072282e-06, "loss": 95.2678, "step": 82120 }, { "epoch": 0.6794060470695289, "grad_norm": 803.4859008789062, "learning_rate": 3.0676239616861234e-06, "loss": 93.7857, "step": 82130 }, { "epoch": 0.6794887703188981, "grad_norm": 790.937744140625, "learning_rate": 3.066323257315311e-06, "loss": 79.9522, "step": 82140 }, { "epoch": 0.6795714935682674, "grad_norm": 756.3161010742188, "learning_rate": 3.065022706798284e-06, "loss": 62.7251, "step": 82150 }, { "epoch": 0.6796542168176366, "grad_norm": 976.355224609375, "learning_rate": 3.06372231023852e-06, "loss": 115.6686, "step": 82160 }, { "epoch": 0.6797369400670058, "grad_norm": 841.8123168945312, "learning_rate": 3.0624220677394854e-06, "loss": 87.1789, "step": 82170 }, { "epoch": 0.6798196633163751, "grad_norm": 945.3234252929688, "learning_rate": 3.0611219794046344e-06, "loss": 93.9989, "step": 82180 }, { "epoch": 0.6799023865657443, "grad_norm": 1082.9246826171875, "learning_rate": 3.05982204533741e-06, "loss": 66.153, "step": 82190 }, { "epoch": 0.6799851098151135, "grad_norm": 707.8014526367188, "learning_rate": 3.0585222656412406e-06, "loss": 114.1069, "step": 82200 }, { "epoch": 0.6800678330644828, "grad_norm": 942.4901123046875, "learning_rate": 3.0572226404195436e-06, "loss": 63.9763, "step": 82210 }, { "epoch": 0.680150556313852, "grad_norm": 668.8705444335938, "learning_rate": 3.055923169775726e-06, "loss": 71.8056, "step": 82220 }, { "epoch": 0.6802332795632212, "grad_norm": 1010.2730712890625, "learning_rate": 3.054623853813179e-06, "loss": 116.1978, "step": 82230 }, { "epoch": 0.6803160028125905, "grad_norm": 636.508544921875, "learning_rate": 3.0533246926352834e-06, "loss": 88.7034, "step": 82240 }, { "epoch": 0.6803987260619597, "grad_norm": 1168.9595947265625, "learning_rate": 3.0520256863454077e-06, "loss": 94.9172, "step": 82250 }, { "epoch": 0.6804814493113289, "grad_norm": 727.0571899414062, "learning_rate": 3.05072683504691e-06, "loss": 85.0885, "step": 82260 }, { "epoch": 0.6805641725606982, "grad_norm": 1059.238037109375, "learning_rate": 3.049428138843133e-06, "loss": 87.404, "step": 82270 }, { "epoch": 0.6806468958100674, "grad_norm": 720.4179077148438, "learning_rate": 3.0481295978374037e-06, "loss": 84.2605, "step": 82280 }, { "epoch": 0.6807296190594366, "grad_norm": 1156.1129150390625, "learning_rate": 3.0468312121330464e-06, "loss": 99.4175, "step": 82290 }, { "epoch": 0.6808123423088059, "grad_norm": 987.896240234375, "learning_rate": 3.0455329818333652e-06, "loss": 66.1081, "step": 82300 }, { "epoch": 0.6808950655581751, "grad_norm": 878.2027587890625, "learning_rate": 3.044234907041655e-06, "loss": 98.8819, "step": 82310 }, { "epoch": 0.6809777888075443, "grad_norm": 1048.861083984375, "learning_rate": 3.0429369878611968e-06, "loss": 67.5868, "step": 82320 }, { "epoch": 0.6810605120569136, "grad_norm": 1312.079833984375, "learning_rate": 3.041639224395262e-06, "loss": 120.5818, "step": 82330 }, { "epoch": 0.6811432353062828, "grad_norm": 1217.2681884765625, "learning_rate": 3.0403416167471044e-06, "loss": 84.3008, "step": 82340 }, { "epoch": 0.681225958555652, "grad_norm": 1105.7882080078125, "learning_rate": 3.0390441650199727e-06, "loss": 80.4502, "step": 82350 }, { "epoch": 0.6813086818050214, "grad_norm": 722.1676635742188, "learning_rate": 3.0377468693170985e-06, "loss": 109.3831, "step": 82360 }, { "epoch": 0.6813914050543906, "grad_norm": 734.0096435546875, "learning_rate": 3.0364497297416973e-06, "loss": 80.3656, "step": 82370 }, { "epoch": 0.6814741283037598, "grad_norm": 392.9991760253906, "learning_rate": 3.035152746396981e-06, "loss": 93.2643, "step": 82380 }, { "epoch": 0.6815568515531291, "grad_norm": 724.57177734375, "learning_rate": 3.0338559193861434e-06, "loss": 106.8783, "step": 82390 }, { "epoch": 0.6816395748024983, "grad_norm": 511.555419921875, "learning_rate": 3.032559248812367e-06, "loss": 118.9703, "step": 82400 }, { "epoch": 0.6817222980518675, "grad_norm": 378.6773376464844, "learning_rate": 3.0312627347788208e-06, "loss": 82.5903, "step": 82410 }, { "epoch": 0.6818050213012367, "grad_norm": 438.7552490234375, "learning_rate": 3.0299663773886646e-06, "loss": 73.4946, "step": 82420 }, { "epoch": 0.681887744550606, "grad_norm": 1227.794921875, "learning_rate": 3.0286701767450423e-06, "loss": 114.8965, "step": 82430 }, { "epoch": 0.6819704677999752, "grad_norm": 591.5338745117188, "learning_rate": 3.0273741329510852e-06, "loss": 116.4364, "step": 82440 }, { "epoch": 0.6820531910493444, "grad_norm": 852.6351928710938, "learning_rate": 3.0260782461099192e-06, "loss": 71.9436, "step": 82450 }, { "epoch": 0.6821359142987137, "grad_norm": 758.0744018554688, "learning_rate": 3.024782516324645e-06, "loss": 82.7659, "step": 82460 }, { "epoch": 0.6822186375480829, "grad_norm": 1753.00048828125, "learning_rate": 3.0234869436983606e-06, "loss": 109.4363, "step": 82470 }, { "epoch": 0.6823013607974521, "grad_norm": 1189.2587890625, "learning_rate": 3.02219152833415e-06, "loss": 96.6973, "step": 82480 }, { "epoch": 0.6823840840468214, "grad_norm": 894.1843872070312, "learning_rate": 3.0208962703350832e-06, "loss": 86.2049, "step": 82490 }, { "epoch": 0.6824668072961906, "grad_norm": 675.1846313476562, "learning_rate": 3.019601169804216e-06, "loss": 73.5452, "step": 82500 }, { "epoch": 0.6825495305455598, "grad_norm": 733.6863403320312, "learning_rate": 3.0183062268445964e-06, "loss": 96.7175, "step": 82510 }, { "epoch": 0.6826322537949291, "grad_norm": 953.9824829101562, "learning_rate": 3.0170114415592543e-06, "loss": 95.8179, "step": 82520 }, { "epoch": 0.6827149770442983, "grad_norm": 776.6597290039062, "learning_rate": 3.015716814051213e-06, "loss": 75.2235, "step": 82530 }, { "epoch": 0.6827977002936675, "grad_norm": 741.5625610351562, "learning_rate": 3.0144223444234767e-06, "loss": 90.4091, "step": 82540 }, { "epoch": 0.6828804235430368, "grad_norm": 1144.602783203125, "learning_rate": 3.0131280327790412e-06, "loss": 74.8416, "step": 82550 }, { "epoch": 0.682963146792406, "grad_norm": 1054.5020751953125, "learning_rate": 3.0118338792208912e-06, "loss": 93.5738, "step": 82560 }, { "epoch": 0.6830458700417752, "grad_norm": 1366.202392578125, "learning_rate": 3.010539883851993e-06, "loss": 99.257, "step": 82570 }, { "epoch": 0.6831285932911445, "grad_norm": 303.204345703125, "learning_rate": 3.009246046775307e-06, "loss": 79.5888, "step": 82580 }, { "epoch": 0.6832113165405137, "grad_norm": 1691.6275634765625, "learning_rate": 3.0079523680937766e-06, "loss": 130.3326, "step": 82590 }, { "epoch": 0.6832940397898829, "grad_norm": 1158.856689453125, "learning_rate": 3.006658847910334e-06, "loss": 113.3093, "step": 82600 }, { "epoch": 0.6833767630392522, "grad_norm": 804.7373046875, "learning_rate": 3.005365486327899e-06, "loss": 85.9386, "step": 82610 }, { "epoch": 0.6834594862886214, "grad_norm": 961.7406005859375, "learning_rate": 3.004072283449379e-06, "loss": 88.6196, "step": 82620 }, { "epoch": 0.6835422095379906, "grad_norm": 764.2142333984375, "learning_rate": 3.0027792393776666e-06, "loss": 89.8795, "step": 82630 }, { "epoch": 0.6836249327873599, "grad_norm": 751.17822265625, "learning_rate": 3.001486354215644e-06, "loss": 70.7893, "step": 82640 }, { "epoch": 0.6837076560367291, "grad_norm": 1221.4046630859375, "learning_rate": 3.0001936280661794e-06, "loss": 80.4374, "step": 82650 }, { "epoch": 0.6837903792860983, "grad_norm": 748.9243774414062, "learning_rate": 2.998901061032131e-06, "loss": 115.65, "step": 82660 }, { "epoch": 0.6838731025354676, "grad_norm": 847.3457641601562, "learning_rate": 2.9976086532163397e-06, "loss": 88.8179, "step": 82670 }, { "epoch": 0.6839558257848368, "grad_norm": 363.82830810546875, "learning_rate": 2.9963164047216397e-06, "loss": 113.4569, "step": 82680 }, { "epoch": 0.684038549034206, "grad_norm": 855.93212890625, "learning_rate": 2.9950243156508473e-06, "loss": 99.6168, "step": 82690 }, { "epoch": 0.6841212722835753, "grad_norm": 877.383544921875, "learning_rate": 2.9937323861067695e-06, "loss": 107.2685, "step": 82700 }, { "epoch": 0.6842039955329445, "grad_norm": 737.8973388671875, "learning_rate": 2.992440616192197e-06, "loss": 73.6583, "step": 82710 }, { "epoch": 0.6842867187823137, "grad_norm": 878.8938598632812, "learning_rate": 2.9911490060099117e-06, "loss": 110.4591, "step": 82720 }, { "epoch": 0.684369442031683, "grad_norm": 1008.2698364257812, "learning_rate": 2.9898575556626807e-06, "loss": 84.5192, "step": 82730 }, { "epoch": 0.6844521652810522, "grad_norm": 942.9556884765625, "learning_rate": 2.9885662652532586e-06, "loss": 101.3532, "step": 82740 }, { "epoch": 0.6845348885304214, "grad_norm": 903.5498657226562, "learning_rate": 2.9872751348843875e-06, "loss": 87.6757, "step": 82750 }, { "epoch": 0.6846176117797907, "grad_norm": 1304.6390380859375, "learning_rate": 2.985984164658796e-06, "loss": 78.2163, "step": 82760 }, { "epoch": 0.68470033502916, "grad_norm": 691.3413696289062, "learning_rate": 2.9846933546792012e-06, "loss": 79.0234, "step": 82770 }, { "epoch": 0.6847830582785291, "grad_norm": 488.5516662597656, "learning_rate": 2.9834027050483085e-06, "loss": 106.5184, "step": 82780 }, { "epoch": 0.6848657815278985, "grad_norm": 1274.3031005859375, "learning_rate": 2.9821122158688086e-06, "loss": 75.8053, "step": 82790 }, { "epoch": 0.6849485047772677, "grad_norm": 719.878173828125, "learning_rate": 2.980821887243377e-06, "loss": 88.5654, "step": 82800 }, { "epoch": 0.6850312280266369, "grad_norm": 974.0200805664062, "learning_rate": 2.979531719274681e-06, "loss": 79.3588, "step": 82810 }, { "epoch": 0.6851139512760062, "grad_norm": 875.3156127929688, "learning_rate": 2.978241712065374e-06, "loss": 97.1635, "step": 82820 }, { "epoch": 0.6851966745253754, "grad_norm": 1160.015869140625, "learning_rate": 2.9769518657180953e-06, "loss": 93.5529, "step": 82830 }, { "epoch": 0.6852793977747446, "grad_norm": 419.8360290527344, "learning_rate": 2.9756621803354722e-06, "loss": 74.9933, "step": 82840 }, { "epoch": 0.6853621210241139, "grad_norm": 822.18505859375, "learning_rate": 2.9743726560201185e-06, "loss": 147.5206, "step": 82850 }, { "epoch": 0.6854448442734831, "grad_norm": 1076.56787109375, "learning_rate": 2.9730832928746355e-06, "loss": 89.3311, "step": 82860 }, { "epoch": 0.6855275675228523, "grad_norm": 563.077392578125, "learning_rate": 2.9717940910016135e-06, "loss": 94.1015, "step": 82870 }, { "epoch": 0.6856102907722216, "grad_norm": 1070.950927734375, "learning_rate": 2.9705050505036294e-06, "loss": 75.7543, "step": 82880 }, { "epoch": 0.6856930140215908, "grad_norm": 563.2598876953125, "learning_rate": 2.9692161714832422e-06, "loss": 81.8349, "step": 82890 }, { "epoch": 0.68577573727096, "grad_norm": 779.0994262695312, "learning_rate": 2.9679274540430037e-06, "loss": 77.1619, "step": 82900 }, { "epoch": 0.6858584605203293, "grad_norm": 1350.3271484375, "learning_rate": 2.966638898285452e-06, "loss": 76.3261, "step": 82910 }, { "epoch": 0.6859411837696985, "grad_norm": 790.4927368164062, "learning_rate": 2.9653505043131125e-06, "loss": 89.7235, "step": 82920 }, { "epoch": 0.6860239070190677, "grad_norm": 549.5680541992188, "learning_rate": 2.9640622722284944e-06, "loss": 84.1619, "step": 82930 }, { "epoch": 0.686106630268437, "grad_norm": 602.5223388671875, "learning_rate": 2.962774202134098e-06, "loss": 115.692, "step": 82940 }, { "epoch": 0.6861893535178062, "grad_norm": 574.7587890625, "learning_rate": 2.961486294132409e-06, "loss": 103.2164, "step": 82950 }, { "epoch": 0.6862720767671754, "grad_norm": 1748.5323486328125, "learning_rate": 2.960198548325901e-06, "loss": 89.1274, "step": 82960 }, { "epoch": 0.6863548000165447, "grad_norm": 626.42041015625, "learning_rate": 2.958910964817032e-06, "loss": 85.8427, "step": 82970 }, { "epoch": 0.6864375232659139, "grad_norm": 692.4397583007812, "learning_rate": 2.9576235437082502e-06, "loss": 100.6521, "step": 82980 }, { "epoch": 0.6865202465152831, "grad_norm": 587.5997314453125, "learning_rate": 2.9563362851019893e-06, "loss": 65.4427, "step": 82990 }, { "epoch": 0.6866029697646524, "grad_norm": 582.0680541992188, "learning_rate": 2.9550491891006704e-06, "loss": 82.0156, "step": 83000 }, { "epoch": 0.6866856930140216, "grad_norm": 1114.3927001953125, "learning_rate": 2.9537622558067036e-06, "loss": 87.1522, "step": 83010 }, { "epoch": 0.6867684162633908, "grad_norm": 487.21307373046875, "learning_rate": 2.9524754853224837e-06, "loss": 94.6792, "step": 83020 }, { "epoch": 0.6868511395127601, "grad_norm": 713.6537475585938, "learning_rate": 2.9511888777503916e-06, "loss": 67.1539, "step": 83030 }, { "epoch": 0.6869338627621293, "grad_norm": 840.3438110351562, "learning_rate": 2.949902433192798e-06, "loss": 84.9945, "step": 83040 }, { "epoch": 0.6870165860114985, "grad_norm": 1528.6619873046875, "learning_rate": 2.94861615175206e-06, "loss": 77.7083, "step": 83050 }, { "epoch": 0.6870993092608678, "grad_norm": 1220.947998046875, "learning_rate": 2.9473300335305193e-06, "loss": 89.6249, "step": 83060 }, { "epoch": 0.687182032510237, "grad_norm": 588.5947875976562, "learning_rate": 2.946044078630508e-06, "loss": 85.9502, "step": 83070 }, { "epoch": 0.6872647557596062, "grad_norm": 1005.1710815429688, "learning_rate": 2.9447582871543423e-06, "loss": 85.558, "step": 83080 }, { "epoch": 0.6873474790089755, "grad_norm": 658.07177734375, "learning_rate": 2.9434726592043263e-06, "loss": 92.5939, "step": 83090 }, { "epoch": 0.6874302022583447, "grad_norm": 996.118896484375, "learning_rate": 2.942187194882754e-06, "loss": 109.5573, "step": 83100 }, { "epoch": 0.6875129255077139, "grad_norm": 852.5332641601562, "learning_rate": 2.940901894291902e-06, "loss": 113.4957, "step": 83110 }, { "epoch": 0.6875956487570832, "grad_norm": 6915.46484375, "learning_rate": 2.939616757534037e-06, "loss": 119.719, "step": 83120 }, { "epoch": 0.6876783720064524, "grad_norm": 852.2628173828125, "learning_rate": 2.938331784711411e-06, "loss": 89.6527, "step": 83130 }, { "epoch": 0.6877610952558216, "grad_norm": 790.9977416992188, "learning_rate": 2.937046975926262e-06, "loss": 83.5397, "step": 83140 }, { "epoch": 0.6878438185051908, "grad_norm": 721.8024291992188, "learning_rate": 2.9357623312808183e-06, "loss": 67.4693, "step": 83150 }, { "epoch": 0.6879265417545601, "grad_norm": 525.3212280273438, "learning_rate": 2.934477850877292e-06, "loss": 100.1755, "step": 83160 }, { "epoch": 0.6880092650039293, "grad_norm": 536.9306640625, "learning_rate": 2.9331935348178838e-06, "loss": 91.7662, "step": 83170 }, { "epoch": 0.6880919882532985, "grad_norm": 924.3001098632812, "learning_rate": 2.931909383204781e-06, "loss": 77.1973, "step": 83180 }, { "epoch": 0.6881747115026678, "grad_norm": 688.9435424804688, "learning_rate": 2.9306253961401553e-06, "loss": 79.3377, "step": 83190 }, { "epoch": 0.688257434752037, "grad_norm": 1393.37451171875, "learning_rate": 2.929341573726171e-06, "loss": 111.1658, "step": 83200 }, { "epoch": 0.6883401580014062, "grad_norm": 524.9993896484375, "learning_rate": 2.928057916064975e-06, "loss": 68.5311, "step": 83210 }, { "epoch": 0.6884228812507756, "grad_norm": 1237.0621337890625, "learning_rate": 2.9267744232587035e-06, "loss": 102.3083, "step": 83220 }, { "epoch": 0.6885056045001448, "grad_norm": 620.7877807617188, "learning_rate": 2.925491095409473e-06, "loss": 107.3997, "step": 83230 }, { "epoch": 0.688588327749514, "grad_norm": 1383.7750244140625, "learning_rate": 2.924207932619397e-06, "loss": 91.8689, "step": 83240 }, { "epoch": 0.6886710509988833, "grad_norm": 775.3110961914062, "learning_rate": 2.9229249349905686e-06, "loss": 87.8382, "step": 83250 }, { "epoch": 0.6887537742482525, "grad_norm": 681.6326904296875, "learning_rate": 2.9216421026250707e-06, "loss": 70.0503, "step": 83260 }, { "epoch": 0.6888364974976217, "grad_norm": 786.73046875, "learning_rate": 2.9203594356249726e-06, "loss": 117.5731, "step": 83270 }, { "epoch": 0.688919220746991, "grad_norm": 575.7755737304688, "learning_rate": 2.919076934092329e-06, "loss": 64.0, "step": 83280 }, { "epoch": 0.6890019439963602, "grad_norm": 677.3495483398438, "learning_rate": 2.9177945981291843e-06, "loss": 69.7188, "step": 83290 }, { "epoch": 0.6890846672457294, "grad_norm": 704.7107543945312, "learning_rate": 2.916512427837568e-06, "loss": 112.342, "step": 83300 }, { "epoch": 0.6891673904950987, "grad_norm": 861.6221923828125, "learning_rate": 2.9152304233194974e-06, "loss": 63.6381, "step": 83310 }, { "epoch": 0.6892501137444679, "grad_norm": 722.7597045898438, "learning_rate": 2.9139485846769723e-06, "loss": 98.6105, "step": 83320 }, { "epoch": 0.6893328369938371, "grad_norm": 586.8096313476562, "learning_rate": 2.9126669120119846e-06, "loss": 112.3304, "step": 83330 }, { "epoch": 0.6894155602432064, "grad_norm": 1111.9354248046875, "learning_rate": 2.9113854054265112e-06, "loss": 104.5657, "step": 83340 }, { "epoch": 0.6894982834925756, "grad_norm": 1186.3795166015625, "learning_rate": 2.9101040650225155e-06, "loss": 112.6777, "step": 83350 }, { "epoch": 0.6895810067419448, "grad_norm": 627.06298828125, "learning_rate": 2.9088228909019455e-06, "loss": 91.1352, "step": 83360 }, { "epoch": 0.6896637299913141, "grad_norm": 530.3230590820312, "learning_rate": 2.9075418831667436e-06, "loss": 74.27, "step": 83370 }, { "epoch": 0.6897464532406833, "grad_norm": 1148.4014892578125, "learning_rate": 2.906261041918831e-06, "loss": 91.6355, "step": 83380 }, { "epoch": 0.6898291764900525, "grad_norm": 896.2343139648438, "learning_rate": 2.90498036726012e-06, "loss": 101.3476, "step": 83390 }, { "epoch": 0.6899118997394218, "grad_norm": 748.8731689453125, "learning_rate": 2.903699859292505e-06, "loss": 81.6133, "step": 83400 }, { "epoch": 0.689994622988791, "grad_norm": 1056.1656494140625, "learning_rate": 2.9024195181178704e-06, "loss": 90.9416, "step": 83410 }, { "epoch": 0.6900773462381602, "grad_norm": 1155.9085693359375, "learning_rate": 2.9011393438380884e-06, "loss": 80.1188, "step": 83420 }, { "epoch": 0.6901600694875295, "grad_norm": 807.7344970703125, "learning_rate": 2.8998593365550178e-06, "loss": 121.4385, "step": 83430 }, { "epoch": 0.6902427927368987, "grad_norm": 552.521484375, "learning_rate": 2.8985794963704992e-06, "loss": 92.7039, "step": 83440 }, { "epoch": 0.6903255159862679, "grad_norm": 1697.0496826171875, "learning_rate": 2.8972998233863657e-06, "loss": 101.2329, "step": 83450 }, { "epoch": 0.6904082392356372, "grad_norm": 824.8051147460938, "learning_rate": 2.8960203177044364e-06, "loss": 94.2846, "step": 83460 }, { "epoch": 0.6904909624850064, "grad_norm": 1690.2552490234375, "learning_rate": 2.8947409794265146e-06, "loss": 85.7355, "step": 83470 }, { "epoch": 0.6905736857343756, "grad_norm": 961.0975341796875, "learning_rate": 2.893461808654393e-06, "loss": 99.6032, "step": 83480 }, { "epoch": 0.6906564089837449, "grad_norm": 622.6866455078125, "learning_rate": 2.892182805489846e-06, "loss": 78.2167, "step": 83490 }, { "epoch": 0.6907391322331141, "grad_norm": 1131.5731201171875, "learning_rate": 2.8909039700346385e-06, "loss": 95.0546, "step": 83500 }, { "epoch": 0.6908218554824833, "grad_norm": 324.06011962890625, "learning_rate": 2.889625302390524e-06, "loss": 72.3154, "step": 83510 }, { "epoch": 0.6909045787318526, "grad_norm": 1070.0745849609375, "learning_rate": 2.8883468026592382e-06, "loss": 91.6812, "step": 83520 }, { "epoch": 0.6909873019812218, "grad_norm": 876.22607421875, "learning_rate": 2.8870684709425063e-06, "loss": 86.1727, "step": 83530 }, { "epoch": 0.691070025230591, "grad_norm": 1324.11376953125, "learning_rate": 2.885790307342039e-06, "loss": 97.0591, "step": 83540 }, { "epoch": 0.6911527484799603, "grad_norm": 1690.0306396484375, "learning_rate": 2.884512311959532e-06, "loss": 101.5719, "step": 83550 }, { "epoch": 0.6912354717293295, "grad_norm": 436.6270446777344, "learning_rate": 2.8832344848966758e-06, "loss": 64.3056, "step": 83560 }, { "epoch": 0.6913181949786987, "grad_norm": 1113.216552734375, "learning_rate": 2.8819568262551344e-06, "loss": 134.5069, "step": 83570 }, { "epoch": 0.691400918228068, "grad_norm": 676.7778930664062, "learning_rate": 2.8806793361365686e-06, "loss": 128.0765, "step": 83580 }, { "epoch": 0.6914836414774372, "grad_norm": 1021.91259765625, "learning_rate": 2.8794020146426217e-06, "loss": 95.5648, "step": 83590 }, { "epoch": 0.6915663647268064, "grad_norm": 963.874755859375, "learning_rate": 2.8781248618749235e-06, "loss": 85.2975, "step": 83600 }, { "epoch": 0.6916490879761757, "grad_norm": 819.314453125, "learning_rate": 2.8768478779350927e-06, "loss": 76.2316, "step": 83610 }, { "epoch": 0.691731811225545, "grad_norm": 1420.544921875, "learning_rate": 2.875571062924732e-06, "loss": 102.6076, "step": 83620 }, { "epoch": 0.6918145344749141, "grad_norm": 1988.164794921875, "learning_rate": 2.874294416945432e-06, "loss": 109.0461, "step": 83630 }, { "epoch": 0.6918972577242835, "grad_norm": 808.8279418945312, "learning_rate": 2.8730179400987697e-06, "loss": 102.7774, "step": 83640 }, { "epoch": 0.6919799809736527, "grad_norm": 1196.42626953125, "learning_rate": 2.871741632486308e-06, "loss": 86.4877, "step": 83650 }, { "epoch": 0.6920627042230219, "grad_norm": 940.0408935546875, "learning_rate": 2.8704654942095977e-06, "loss": 103.2302, "step": 83660 }, { "epoch": 0.6921454274723912, "grad_norm": 955.0111083984375, "learning_rate": 2.869189525370174e-06, "loss": 86.1693, "step": 83670 }, { "epoch": 0.6922281507217604, "grad_norm": 411.0567932128906, "learning_rate": 2.8679137260695614e-06, "loss": 82.702, "step": 83680 }, { "epoch": 0.6923108739711296, "grad_norm": 586.8140258789062, "learning_rate": 2.866638096409269e-06, "loss": 97.6693, "step": 83690 }, { "epoch": 0.6923935972204989, "grad_norm": 781.5844116210938, "learning_rate": 2.8653626364907918e-06, "loss": 52.9038, "step": 83700 }, { "epoch": 0.6924763204698681, "grad_norm": 1218.3243408203125, "learning_rate": 2.8640873464156127e-06, "loss": 77.371, "step": 83710 }, { "epoch": 0.6925590437192373, "grad_norm": 860.3048706054688, "learning_rate": 2.8628122262852015e-06, "loss": 78.9779, "step": 83720 }, { "epoch": 0.6926417669686066, "grad_norm": 655.4887084960938, "learning_rate": 2.861537276201013e-06, "loss": 95.3996, "step": 83730 }, { "epoch": 0.6927244902179758, "grad_norm": 611.69384765625, "learning_rate": 2.860262496264489e-06, "loss": 93.9206, "step": 83740 }, { "epoch": 0.692807213467345, "grad_norm": 1187.9385986328125, "learning_rate": 2.858987886577058e-06, "loss": 85.537, "step": 83750 }, { "epoch": 0.6928899367167143, "grad_norm": 594.444580078125, "learning_rate": 2.857713447240135e-06, "loss": 96.7337, "step": 83760 }, { "epoch": 0.6929726599660835, "grad_norm": 702.6067504882812, "learning_rate": 2.8564391783551214e-06, "loss": 96.4278, "step": 83770 }, { "epoch": 0.6930553832154527, "grad_norm": 799.9725341796875, "learning_rate": 2.855165080023405e-06, "loss": 85.3117, "step": 83780 }, { "epoch": 0.693138106464822, "grad_norm": 865.3021240234375, "learning_rate": 2.85389115234636e-06, "loss": 97.0047, "step": 83790 }, { "epoch": 0.6932208297141912, "grad_norm": 595.7553100585938, "learning_rate": 2.8526173954253458e-06, "loss": 87.2789, "step": 83800 }, { "epoch": 0.6933035529635604, "grad_norm": 1058.0035400390625, "learning_rate": 2.8513438093617107e-06, "loss": 119.3666, "step": 83810 }, { "epoch": 0.6933862762129297, "grad_norm": 1175.2041015625, "learning_rate": 2.8500703942567874e-06, "loss": 82.3834, "step": 83820 }, { "epoch": 0.6934689994622989, "grad_norm": 966.0508422851562, "learning_rate": 2.848797150211896e-06, "loss": 89.0491, "step": 83830 }, { "epoch": 0.6935517227116681, "grad_norm": 732.8645629882812, "learning_rate": 2.847524077328343e-06, "loss": 90.359, "step": 83840 }, { "epoch": 0.6936344459610374, "grad_norm": 575.0491943359375, "learning_rate": 2.8462511757074205e-06, "loss": 73.8018, "step": 83850 }, { "epoch": 0.6937171692104066, "grad_norm": 1305.1068115234375, "learning_rate": 2.844978445450408e-06, "loss": 90.5077, "step": 83860 }, { "epoch": 0.6937998924597758, "grad_norm": 1045.33935546875, "learning_rate": 2.8437058866585698e-06, "loss": 114.761, "step": 83870 }, { "epoch": 0.693882615709145, "grad_norm": 688.1589965820312, "learning_rate": 2.842433499433158e-06, "loss": 91.853, "step": 83880 }, { "epoch": 0.6939653389585143, "grad_norm": 436.8230285644531, "learning_rate": 2.841161283875411e-06, "loss": 106.294, "step": 83890 }, { "epoch": 0.6940480622078835, "grad_norm": 906.2509765625, "learning_rate": 2.8398892400865537e-06, "loss": 74.6637, "step": 83900 }, { "epoch": 0.6941307854572527, "grad_norm": 450.2344665527344, "learning_rate": 2.838617368167797e-06, "loss": 143.2664, "step": 83910 }, { "epoch": 0.694213508706622, "grad_norm": 952.6104125976562, "learning_rate": 2.837345668220333e-06, "loss": 69.9202, "step": 83920 }, { "epoch": 0.6942962319559912, "grad_norm": 755.5713500976562, "learning_rate": 2.836074140345352e-06, "loss": 82.3544, "step": 83930 }, { "epoch": 0.6943789552053604, "grad_norm": 659.2360229492188, "learning_rate": 2.834802784644019e-06, "loss": 92.3773, "step": 83940 }, { "epoch": 0.6944616784547297, "grad_norm": 567.8734130859375, "learning_rate": 2.8335316012174925e-06, "loss": 102.9247, "step": 83950 }, { "epoch": 0.6945444017040989, "grad_norm": 434.0081787109375, "learning_rate": 2.8322605901669133e-06, "loss": 73.0931, "step": 83960 }, { "epoch": 0.6946271249534681, "grad_norm": 453.88153076171875, "learning_rate": 2.8309897515934104e-06, "loss": 98.706, "step": 83970 }, { "epoch": 0.6947098482028374, "grad_norm": 726.1422119140625, "learning_rate": 2.8297190855980987e-06, "loss": 87.1649, "step": 83980 }, { "epoch": 0.6947925714522066, "grad_norm": 784.1115112304688, "learning_rate": 2.8284485922820814e-06, "loss": 74.6825, "step": 83990 }, { "epoch": 0.6948752947015758, "grad_norm": 674.6708984375, "learning_rate": 2.8271782717464413e-06, "loss": 59.7278, "step": 84000 }, { "epoch": 0.6949580179509451, "grad_norm": 1085.0732421875, "learning_rate": 2.8259081240922522e-06, "loss": 110.7847, "step": 84010 }, { "epoch": 0.6950407412003143, "grad_norm": 902.2394409179688, "learning_rate": 2.8246381494205775e-06, "loss": 72.2629, "step": 84020 }, { "epoch": 0.6951234644496835, "grad_norm": 1027.4735107421875, "learning_rate": 2.8233683478324627e-06, "loss": 99.9426, "step": 84030 }, { "epoch": 0.6952061876990528, "grad_norm": 709.7374267578125, "learning_rate": 2.822098719428938e-06, "loss": 100.229, "step": 84040 }, { "epoch": 0.695288910948422, "grad_norm": 1114.0150146484375, "learning_rate": 2.8208292643110237e-06, "loss": 94.4212, "step": 84050 }, { "epoch": 0.6953716341977912, "grad_norm": 679.0440063476562, "learning_rate": 2.8195599825797233e-06, "loss": 76.9554, "step": 84060 }, { "epoch": 0.6954543574471606, "grad_norm": 947.027099609375, "learning_rate": 2.818290874336028e-06, "loss": 100.9757, "step": 84070 }, { "epoch": 0.6955370806965298, "grad_norm": 2001.5252685546875, "learning_rate": 2.817021939680918e-06, "loss": 95.2409, "step": 84080 }, { "epoch": 0.695619803945899, "grad_norm": 871.5374145507812, "learning_rate": 2.8157531787153515e-06, "loss": 94.9407, "step": 84090 }, { "epoch": 0.6957025271952683, "grad_norm": 1146.988037109375, "learning_rate": 2.8144845915402796e-06, "loss": 102.4395, "step": 84100 }, { "epoch": 0.6957852504446375, "grad_norm": 855.0643920898438, "learning_rate": 2.813216178256637e-06, "loss": 105.7368, "step": 84110 }, { "epoch": 0.6958679736940067, "grad_norm": 662.9407958984375, "learning_rate": 2.8119479389653492e-06, "loss": 76.1168, "step": 84120 }, { "epoch": 0.695950696943376, "grad_norm": 1097.759765625, "learning_rate": 2.8106798737673223e-06, "loss": 86.9396, "step": 84130 }, { "epoch": 0.6960334201927452, "grad_norm": 740.43896484375, "learning_rate": 2.8094119827634496e-06, "loss": 84.4411, "step": 84140 }, { "epoch": 0.6961161434421144, "grad_norm": 1305.01171875, "learning_rate": 2.8081442660546126e-06, "loss": 100.1945, "step": 84150 }, { "epoch": 0.6961988666914837, "grad_norm": 1327.839599609375, "learning_rate": 2.806876723741677e-06, "loss": 115.4466, "step": 84160 }, { "epoch": 0.6962815899408529, "grad_norm": 795.6221923828125, "learning_rate": 2.805609355925497e-06, "loss": 75.2351, "step": 84170 }, { "epoch": 0.6963643131902221, "grad_norm": 712.4464721679688, "learning_rate": 2.8043421627069077e-06, "loss": 70.8538, "step": 84180 }, { "epoch": 0.6964470364395914, "grad_norm": 838.209716796875, "learning_rate": 2.8030751441867364e-06, "loss": 86.4729, "step": 84190 }, { "epoch": 0.6965297596889606, "grad_norm": 584.37646484375, "learning_rate": 2.8018083004657924e-06, "loss": 112.4415, "step": 84200 }, { "epoch": 0.6966124829383298, "grad_norm": 881.0332641601562, "learning_rate": 2.800541631644873e-06, "loss": 145.887, "step": 84210 }, { "epoch": 0.6966952061876991, "grad_norm": 664.2578735351562, "learning_rate": 2.7992751378247627e-06, "loss": 124.8047, "step": 84220 }, { "epoch": 0.6967779294370683, "grad_norm": 929.6114501953125, "learning_rate": 2.79800881910623e-06, "loss": 95.0422, "step": 84230 }, { "epoch": 0.6968606526864375, "grad_norm": 777.854248046875, "learning_rate": 2.7967426755900293e-06, "loss": 84.8647, "step": 84240 }, { "epoch": 0.6969433759358068, "grad_norm": 749.425048828125, "learning_rate": 2.795476707376905e-06, "loss": 89.1832, "step": 84250 }, { "epoch": 0.697026099185176, "grad_norm": 844.8867797851562, "learning_rate": 2.79421091456758e-06, "loss": 75.1808, "step": 84260 }, { "epoch": 0.6971088224345452, "grad_norm": 1129.9508056640625, "learning_rate": 2.7929452972627685e-06, "loss": 108.7002, "step": 84270 }, { "epoch": 0.6971915456839145, "grad_norm": 868.1597900390625, "learning_rate": 2.791679855563171e-06, "loss": 70.2446, "step": 84280 }, { "epoch": 0.6972742689332837, "grad_norm": 1031.17578125, "learning_rate": 2.790414589569473e-06, "loss": 93.3332, "step": 84290 }, { "epoch": 0.6973569921826529, "grad_norm": 544.4410400390625, "learning_rate": 2.789149499382345e-06, "loss": 78.5159, "step": 84300 }, { "epoch": 0.6974397154320222, "grad_norm": 861.5923461914062, "learning_rate": 2.7878845851024426e-06, "loss": 84.6842, "step": 84310 }, { "epoch": 0.6975224386813914, "grad_norm": 761.0271606445312, "learning_rate": 2.786619846830414e-06, "loss": 91.5659, "step": 84320 }, { "epoch": 0.6976051619307606, "grad_norm": 1823.0885009765625, "learning_rate": 2.7853552846668865e-06, "loss": 96.6269, "step": 84330 }, { "epoch": 0.6976878851801299, "grad_norm": 1062.7515869140625, "learning_rate": 2.784090898712476e-06, "loss": 87.3296, "step": 84340 }, { "epoch": 0.6977706084294991, "grad_norm": 582.9891967773438, "learning_rate": 2.7828266890677825e-06, "loss": 89.4428, "step": 84350 }, { "epoch": 0.6978533316788683, "grad_norm": 1207.8365478515625, "learning_rate": 2.781562655833393e-06, "loss": 109.0874, "step": 84360 }, { "epoch": 0.6979360549282376, "grad_norm": 583.7366943359375, "learning_rate": 2.7802987991098816e-06, "loss": 77.5733, "step": 84370 }, { "epoch": 0.6980187781776068, "grad_norm": 479.65802001953125, "learning_rate": 2.7790351189978083e-06, "loss": 83.5992, "step": 84380 }, { "epoch": 0.698101501426976, "grad_norm": 1373.9141845703125, "learning_rate": 2.777771615597717e-06, "loss": 124.8873, "step": 84390 }, { "epoch": 0.6981842246763453, "grad_norm": 1068.3160400390625, "learning_rate": 2.776508289010138e-06, "loss": 102.6386, "step": 84400 }, { "epoch": 0.6982669479257145, "grad_norm": 761.4998779296875, "learning_rate": 2.7752451393355916e-06, "loss": 103.4375, "step": 84410 }, { "epoch": 0.6983496711750837, "grad_norm": 594.727294921875, "learning_rate": 2.773982166674582e-06, "loss": 93.2319, "step": 84420 }, { "epoch": 0.698432394424453, "grad_norm": 516.1226806640625, "learning_rate": 2.772719371127593e-06, "loss": 117.761, "step": 84430 }, { "epoch": 0.6985151176738222, "grad_norm": 1146.329833984375, "learning_rate": 2.771456752795102e-06, "loss": 98.0758, "step": 84440 }, { "epoch": 0.6985978409231914, "grad_norm": 1272.3282470703125, "learning_rate": 2.7701943117775686e-06, "loss": 71.8762, "step": 84450 }, { "epoch": 0.6986805641725607, "grad_norm": 938.8295288085938, "learning_rate": 2.7689320481754414e-06, "loss": 78.6577, "step": 84460 }, { "epoch": 0.69876328742193, "grad_norm": 1171.89306640625, "learning_rate": 2.7676699620891514e-06, "loss": 65.5108, "step": 84470 }, { "epoch": 0.6988460106712991, "grad_norm": 1358.8653564453125, "learning_rate": 2.7664080536191178e-06, "loss": 85.8439, "step": 84480 }, { "epoch": 0.6989287339206685, "grad_norm": 1054.154541015625, "learning_rate": 2.7651463228657444e-06, "loss": 93.1995, "step": 84490 }, { "epoch": 0.6990114571700377, "grad_norm": 1459.2327880859375, "learning_rate": 2.7638847699294196e-06, "loss": 96.8761, "step": 84500 }, { "epoch": 0.6990941804194069, "grad_norm": 607.018310546875, "learning_rate": 2.7626233949105252e-06, "loss": 69.8457, "step": 84510 }, { "epoch": 0.6991769036687762, "grad_norm": 713.6427612304688, "learning_rate": 2.7613621979094173e-06, "loss": 61.2357, "step": 84520 }, { "epoch": 0.6992596269181454, "grad_norm": 1142.56201171875, "learning_rate": 2.7601011790264454e-06, "loss": 81.4103, "step": 84530 }, { "epoch": 0.6993423501675146, "grad_norm": 354.2678527832031, "learning_rate": 2.758840338361942e-06, "loss": 79.8373, "step": 84540 }, { "epoch": 0.6994250734168839, "grad_norm": 730.341552734375, "learning_rate": 2.7575796760162288e-06, "loss": 84.2706, "step": 84550 }, { "epoch": 0.6995077966662531, "grad_norm": 687.3909301757812, "learning_rate": 2.7563191920896084e-06, "loss": 79.1268, "step": 84560 }, { "epoch": 0.6995905199156223, "grad_norm": 771.8450927734375, "learning_rate": 2.755058886682373e-06, "loss": 86.2774, "step": 84570 }, { "epoch": 0.6996732431649916, "grad_norm": 826.2738037109375, "learning_rate": 2.753798759894799e-06, "loss": 96.8274, "step": 84580 }, { "epoch": 0.6997559664143608, "grad_norm": 755.7567749023438, "learning_rate": 2.7525388118271495e-06, "loss": 90.1103, "step": 84590 }, { "epoch": 0.69983868966373, "grad_norm": 1258.190185546875, "learning_rate": 2.751279042579672e-06, "loss": 95.2349, "step": 84600 }, { "epoch": 0.6999214129130992, "grad_norm": 915.1702270507812, "learning_rate": 2.7500194522526007e-06, "loss": 97.5897, "step": 84610 }, { "epoch": 0.7000041361624685, "grad_norm": 395.96044921875, "learning_rate": 2.748760040946156e-06, "loss": 71.4906, "step": 84620 }, { "epoch": 0.7000868594118377, "grad_norm": 1328.4857177734375, "learning_rate": 2.7475008087605428e-06, "loss": 88.9978, "step": 84630 }, { "epoch": 0.7001695826612069, "grad_norm": 1446.4951171875, "learning_rate": 2.746241755795952e-06, "loss": 129.5656, "step": 84640 }, { "epoch": 0.7002523059105762, "grad_norm": 316.505615234375, "learning_rate": 2.7449828821525624e-06, "loss": 87.3932, "step": 84650 }, { "epoch": 0.7003350291599454, "grad_norm": 738.2881469726562, "learning_rate": 2.7437241879305354e-06, "loss": 81.33, "step": 84660 }, { "epoch": 0.7004177524093146, "grad_norm": 449.9028625488281, "learning_rate": 2.7424656732300193e-06, "loss": 71.7711, "step": 84670 }, { "epoch": 0.7005004756586839, "grad_norm": 856.5056762695312, "learning_rate": 2.7412073381511495e-06, "loss": 66.1047, "step": 84680 }, { "epoch": 0.7005831989080531, "grad_norm": 1258.2718505859375, "learning_rate": 2.739949182794045e-06, "loss": 119.7207, "step": 84690 }, { "epoch": 0.7006659221574223, "grad_norm": 1023.8765869140625, "learning_rate": 2.7386912072588123e-06, "loss": 84.5958, "step": 84700 }, { "epoch": 0.7007486454067916, "grad_norm": 1277.8900146484375, "learning_rate": 2.737433411645542e-06, "loss": 85.9086, "step": 84710 }, { "epoch": 0.7008313686561608, "grad_norm": 739.3309326171875, "learning_rate": 2.7361757960543114e-06, "loss": 81.2444, "step": 84720 }, { "epoch": 0.70091409190553, "grad_norm": 902.0711669921875, "learning_rate": 2.7349183605851824e-06, "loss": 72.4432, "step": 84730 }, { "epoch": 0.7009968151548993, "grad_norm": 1022.815185546875, "learning_rate": 2.733661105338205e-06, "loss": 131.5625, "step": 84740 }, { "epoch": 0.7010795384042685, "grad_norm": 1463.1163330078125, "learning_rate": 2.7324040304134125e-06, "loss": 156.1384, "step": 84750 }, { "epoch": 0.7011622616536377, "grad_norm": 762.2933959960938, "learning_rate": 2.731147135910824e-06, "loss": 100.2397, "step": 84760 }, { "epoch": 0.701244984903007, "grad_norm": 1155.1771240234375, "learning_rate": 2.729890421930445e-06, "loss": 89.7689, "step": 84770 }, { "epoch": 0.7013277081523762, "grad_norm": 649.2015991210938, "learning_rate": 2.7286338885722674e-06, "loss": 105.2359, "step": 84780 }, { "epoch": 0.7014104314017454, "grad_norm": 776.761962890625, "learning_rate": 2.7273775359362665e-06, "loss": 78.5909, "step": 84790 }, { "epoch": 0.7014931546511147, "grad_norm": 1017.0020141601562, "learning_rate": 2.7261213641224056e-06, "loss": 87.9357, "step": 84800 }, { "epoch": 0.7015758779004839, "grad_norm": 754.7698974609375, "learning_rate": 2.724865373230632e-06, "loss": 105.561, "step": 84810 }, { "epoch": 0.7016586011498531, "grad_norm": 519.2734375, "learning_rate": 2.723609563360879e-06, "loss": 87.3953, "step": 84820 }, { "epoch": 0.7017413243992224, "grad_norm": 740.0153198242188, "learning_rate": 2.7223539346130655e-06, "loss": 105.0598, "step": 84830 }, { "epoch": 0.7018240476485916, "grad_norm": 761.6763305664062, "learning_rate": 2.7210984870870972e-06, "loss": 110.6421, "step": 84840 }, { "epoch": 0.7019067708979608, "grad_norm": 1044.949951171875, "learning_rate": 2.7198432208828653e-06, "loss": 116.031, "step": 84850 }, { "epoch": 0.7019894941473301, "grad_norm": 934.532958984375, "learning_rate": 2.7185881361002415e-06, "loss": 75.7842, "step": 84860 }, { "epoch": 0.7020722173966993, "grad_norm": 714.5556030273438, "learning_rate": 2.717333232839088e-06, "loss": 105.3236, "step": 84870 }, { "epoch": 0.7021549406460685, "grad_norm": 589.3107299804688, "learning_rate": 2.7160785111992546e-06, "loss": 82.1901, "step": 84880 }, { "epoch": 0.7022376638954378, "grad_norm": 1139.3077392578125, "learning_rate": 2.7148239712805725e-06, "loss": 90.0878, "step": 84890 }, { "epoch": 0.702320387144807, "grad_norm": 951.9703979492188, "learning_rate": 2.713569613182859e-06, "loss": 78.3111, "step": 84900 }, { "epoch": 0.7024031103941762, "grad_norm": 1005.8824462890625, "learning_rate": 2.7123154370059185e-06, "loss": 107.2209, "step": 84910 }, { "epoch": 0.7024858336435456, "grad_norm": 1006.9483642578125, "learning_rate": 2.7110614428495396e-06, "loss": 116.84, "step": 84920 }, { "epoch": 0.7025685568929148, "grad_norm": 607.9546508789062, "learning_rate": 2.709807630813497e-06, "loss": 106.0352, "step": 84930 }, { "epoch": 0.702651280142284, "grad_norm": 458.4266662597656, "learning_rate": 2.7085540009975526e-06, "loss": 78.4597, "step": 84940 }, { "epoch": 0.7027340033916533, "grad_norm": 1103.226318359375, "learning_rate": 2.707300553501448e-06, "loss": 70.445, "step": 84950 }, { "epoch": 0.7028167266410225, "grad_norm": 1002.1087036132812, "learning_rate": 2.7060472884249145e-06, "loss": 87.4876, "step": 84960 }, { "epoch": 0.7028994498903917, "grad_norm": 993.1524047851562, "learning_rate": 2.7047942058676717e-06, "loss": 82.637, "step": 84970 }, { "epoch": 0.702982173139761, "grad_norm": 1153.7645263671875, "learning_rate": 2.703541305929421e-06, "loss": 105.1839, "step": 84980 }, { "epoch": 0.7030648963891302, "grad_norm": 765.0949096679688, "learning_rate": 2.7022885887098492e-06, "loss": 69.9202, "step": 84990 }, { "epoch": 0.7031476196384994, "grad_norm": 930.6118774414062, "learning_rate": 2.701036054308629e-06, "loss": 78.9953, "step": 85000 }, { "epoch": 0.7032303428878687, "grad_norm": 728.7538452148438, "learning_rate": 2.699783702825419e-06, "loss": 86.7408, "step": 85010 }, { "epoch": 0.7033130661372379, "grad_norm": 731.0704345703125, "learning_rate": 2.698531534359864e-06, "loss": 94.9619, "step": 85020 }, { "epoch": 0.7033957893866071, "grad_norm": 795.151611328125, "learning_rate": 2.6972795490115944e-06, "loss": 78.0553, "step": 85030 }, { "epoch": 0.7034785126359764, "grad_norm": 806.3612060546875, "learning_rate": 2.6960277468802203e-06, "loss": 75.0848, "step": 85040 }, { "epoch": 0.7035612358853456, "grad_norm": 1016.3757934570312, "learning_rate": 2.694776128065345e-06, "loss": 70.555, "step": 85050 }, { "epoch": 0.7036439591347148, "grad_norm": 759.8416137695312, "learning_rate": 2.6935246926665513e-06, "loss": 75.158, "step": 85060 }, { "epoch": 0.7037266823840841, "grad_norm": 671.8663940429688, "learning_rate": 2.692273440783415e-06, "loss": 107.0158, "step": 85070 }, { "epoch": 0.7038094056334533, "grad_norm": 978.9486694335938, "learning_rate": 2.6910223725154903e-06, "loss": 105.8965, "step": 85080 }, { "epoch": 0.7038921288828225, "grad_norm": 1325.02294921875, "learning_rate": 2.6897714879623184e-06, "loss": 97.7983, "step": 85090 }, { "epoch": 0.7039748521321918, "grad_norm": 592.0951538085938, "learning_rate": 2.688520787223426e-06, "loss": 86.1239, "step": 85100 }, { "epoch": 0.704057575381561, "grad_norm": 648.145263671875, "learning_rate": 2.6872702703983287e-06, "loss": 81.3647, "step": 85110 }, { "epoch": 0.7041402986309302, "grad_norm": 702.9576416015625, "learning_rate": 2.6860199375865203e-06, "loss": 125.3935, "step": 85120 }, { "epoch": 0.7042230218802995, "grad_norm": 605.6353759765625, "learning_rate": 2.6847697888874853e-06, "loss": 98.5384, "step": 85130 }, { "epoch": 0.7043057451296687, "grad_norm": 843.0704956054688, "learning_rate": 2.683519824400693e-06, "loss": 100.6389, "step": 85140 }, { "epoch": 0.7043884683790379, "grad_norm": 1235.5345458984375, "learning_rate": 2.6822700442255965e-06, "loss": 93.4948, "step": 85150 }, { "epoch": 0.7044711916284072, "grad_norm": 1300.0670166015625, "learning_rate": 2.681020448461634e-06, "loss": 77.3637, "step": 85160 }, { "epoch": 0.7045539148777764, "grad_norm": 2526.052734375, "learning_rate": 2.679771037208234e-06, "loss": 179.5842, "step": 85170 }, { "epoch": 0.7046366381271456, "grad_norm": 994.899169921875, "learning_rate": 2.678521810564804e-06, "loss": 68.4581, "step": 85180 }, { "epoch": 0.7047193613765149, "grad_norm": 894.9552612304688, "learning_rate": 2.6772727686307398e-06, "loss": 98.303, "step": 85190 }, { "epoch": 0.7048020846258841, "grad_norm": 1177.982421875, "learning_rate": 2.676023911505423e-06, "loss": 82.6399, "step": 85200 }, { "epoch": 0.7048848078752533, "grad_norm": 1218.446533203125, "learning_rate": 2.674775239288216e-06, "loss": 101.0908, "step": 85210 }, { "epoch": 0.7049675311246226, "grad_norm": 1381.9801025390625, "learning_rate": 2.673526752078472e-06, "loss": 102.3884, "step": 85220 }, { "epoch": 0.7050502543739918, "grad_norm": 900.25634765625, "learning_rate": 2.6722784499755273e-06, "loss": 70.0581, "step": 85230 }, { "epoch": 0.705132977623361, "grad_norm": 407.5909729003906, "learning_rate": 2.6710303330787035e-06, "loss": 83.3081, "step": 85240 }, { "epoch": 0.7052157008727303, "grad_norm": 914.4041137695312, "learning_rate": 2.6697824014873076e-06, "loss": 82.6285, "step": 85250 }, { "epoch": 0.7052984241220995, "grad_norm": 731.021240234375, "learning_rate": 2.6685346553006293e-06, "loss": 78.5455, "step": 85260 }, { "epoch": 0.7053811473714687, "grad_norm": 1125.2713623046875, "learning_rate": 2.6672870946179506e-06, "loss": 105.0443, "step": 85270 }, { "epoch": 0.705463870620838, "grad_norm": 688.19140625, "learning_rate": 2.6660397195385344e-06, "loss": 114.3066, "step": 85280 }, { "epoch": 0.7055465938702072, "grad_norm": 1272.36083984375, "learning_rate": 2.664792530161624e-06, "loss": 86.4369, "step": 85290 }, { "epoch": 0.7056293171195764, "grad_norm": 1196.80224609375, "learning_rate": 2.6635455265864553e-06, "loss": 107.9678, "step": 85300 }, { "epoch": 0.7057120403689456, "grad_norm": 773.6207275390625, "learning_rate": 2.662298708912246e-06, "loss": 97.1165, "step": 85310 }, { "epoch": 0.705794763618315, "grad_norm": 636.7437744140625, "learning_rate": 2.6610520772382e-06, "loss": 81.5361, "step": 85320 }, { "epoch": 0.7058774868676841, "grad_norm": 875.7274780273438, "learning_rate": 2.659805631663505e-06, "loss": 78.7864, "step": 85330 }, { "epoch": 0.7059602101170533, "grad_norm": 1031.9066162109375, "learning_rate": 2.658559372287337e-06, "loss": 92.2199, "step": 85340 }, { "epoch": 0.7060429333664227, "grad_norm": 583.7274780273438, "learning_rate": 2.6573132992088534e-06, "loss": 63.5923, "step": 85350 }, { "epoch": 0.7061256566157919, "grad_norm": 890.5264282226562, "learning_rate": 2.656067412527197e-06, "loss": 84.5146, "step": 85360 }, { "epoch": 0.706208379865161, "grad_norm": 744.9577026367188, "learning_rate": 2.6548217123415033e-06, "loss": 90.214, "step": 85370 }, { "epoch": 0.7062911031145304, "grad_norm": 792.3532104492188, "learning_rate": 2.6535761987508813e-06, "loss": 79.4676, "step": 85380 }, { "epoch": 0.7063738263638996, "grad_norm": 739.8575439453125, "learning_rate": 2.652330871854433e-06, "loss": 90.5693, "step": 85390 }, { "epoch": 0.7064565496132688, "grad_norm": 450.8844909667969, "learning_rate": 2.651085731751242e-06, "loss": 90.0703, "step": 85400 }, { "epoch": 0.7065392728626381, "grad_norm": 1677.9515380859375, "learning_rate": 2.6498407785403794e-06, "loss": 93.2693, "step": 85410 }, { "epoch": 0.7066219961120073, "grad_norm": 899.0578002929688, "learning_rate": 2.648596012320901e-06, "loss": 79.1233, "step": 85420 }, { "epoch": 0.7067047193613765, "grad_norm": 735.9163208007812, "learning_rate": 2.647351433191846e-06, "loss": 90.7922, "step": 85430 }, { "epoch": 0.7067874426107458, "grad_norm": 951.7572631835938, "learning_rate": 2.64610704125224e-06, "loss": 88.4487, "step": 85440 }, { "epoch": 0.706870165860115, "grad_norm": 1408.339111328125, "learning_rate": 2.644862836601092e-06, "loss": 95.58, "step": 85450 }, { "epoch": 0.7069528891094842, "grad_norm": 850.1331787109375, "learning_rate": 2.6436188193374035e-06, "loss": 96.218, "step": 85460 }, { "epoch": 0.7070356123588535, "grad_norm": 864.1105346679688, "learning_rate": 2.6423749895601494e-06, "loss": 113.5922, "step": 85470 }, { "epoch": 0.7071183356082227, "grad_norm": 969.8984985351562, "learning_rate": 2.6411313473682966e-06, "loss": 98.6914, "step": 85480 }, { "epoch": 0.7072010588575919, "grad_norm": 965.0258178710938, "learning_rate": 2.6398878928607973e-06, "loss": 111.6938, "step": 85490 }, { "epoch": 0.7072837821069612, "grad_norm": 2024.0552978515625, "learning_rate": 2.6386446261365874e-06, "loss": 122.5149, "step": 85500 }, { "epoch": 0.7073665053563304, "grad_norm": 342.0011291503906, "learning_rate": 2.6374015472945868e-06, "loss": 109.7086, "step": 85510 }, { "epoch": 0.7074492286056996, "grad_norm": 848.0896606445312, "learning_rate": 2.6361586564337023e-06, "loss": 91.5193, "step": 85520 }, { "epoch": 0.7075319518550689, "grad_norm": 559.4988403320312, "learning_rate": 2.6349159536528245e-06, "loss": 93.1284, "step": 85530 }, { "epoch": 0.7076146751044381, "grad_norm": 870.3743896484375, "learning_rate": 2.633673439050831e-06, "loss": 80.7735, "step": 85540 }, { "epoch": 0.7076973983538073, "grad_norm": 547.6781616210938, "learning_rate": 2.6324311127265812e-06, "loss": 106.9985, "step": 85550 }, { "epoch": 0.7077801216031766, "grad_norm": 691.0943603515625, "learning_rate": 2.6311889747789225e-06, "loss": 102.5531, "step": 85560 }, { "epoch": 0.7078628448525458, "grad_norm": 845.0540771484375, "learning_rate": 2.6299470253066863e-06, "loss": 86.6591, "step": 85570 }, { "epoch": 0.707945568101915, "grad_norm": 847.3442993164062, "learning_rate": 2.628705264408687e-06, "loss": 106.6626, "step": 85580 }, { "epoch": 0.7080282913512843, "grad_norm": 1003.5369262695312, "learning_rate": 2.6274636921837272e-06, "loss": 106.0019, "step": 85590 }, { "epoch": 0.7081110146006535, "grad_norm": 774.9839477539062, "learning_rate": 2.626222308730594e-06, "loss": 88.3581, "step": 85600 }, { "epoch": 0.7081937378500227, "grad_norm": 854.4312133789062, "learning_rate": 2.6249811141480564e-06, "loss": 89.8101, "step": 85610 }, { "epoch": 0.708276461099392, "grad_norm": 445.865234375, "learning_rate": 2.6237401085348723e-06, "loss": 62.2433, "step": 85620 }, { "epoch": 0.7083591843487612, "grad_norm": 720.7432250976562, "learning_rate": 2.6224992919897817e-06, "loss": 99.8638, "step": 85630 }, { "epoch": 0.7084419075981304, "grad_norm": 997.8077392578125, "learning_rate": 2.6212586646115114e-06, "loss": 77.9413, "step": 85640 }, { "epoch": 0.7085246308474997, "grad_norm": 1052.6624755859375, "learning_rate": 2.620018226498772e-06, "loss": 104.5083, "step": 85650 }, { "epoch": 0.7086073540968689, "grad_norm": 999.076171875, "learning_rate": 2.61877797775026e-06, "loss": 89.7667, "step": 85660 }, { "epoch": 0.7086900773462381, "grad_norm": 1541.536376953125, "learning_rate": 2.6175379184646565e-06, "loss": 92.7317, "step": 85670 }, { "epoch": 0.7087728005956074, "grad_norm": 876.532470703125, "learning_rate": 2.616298048740626e-06, "loss": 85.8813, "step": 85680 }, { "epoch": 0.7088555238449766, "grad_norm": 896.5637817382812, "learning_rate": 2.6150583686768203e-06, "loss": 120.6416, "step": 85690 }, { "epoch": 0.7089382470943458, "grad_norm": 913.8203125, "learning_rate": 2.6138188783718745e-06, "loss": 100.0601, "step": 85700 }, { "epoch": 0.7090209703437151, "grad_norm": 662.0992431640625, "learning_rate": 2.6125795779244125e-06, "loss": 90.1497, "step": 85710 }, { "epoch": 0.7091036935930843, "grad_norm": 738.2545776367188, "learning_rate": 2.611340467433031e-06, "loss": 94.7059, "step": 85720 }, { "epoch": 0.7091864168424535, "grad_norm": 925.9259643554688, "learning_rate": 2.61010154699633e-06, "loss": 64.9561, "step": 85730 }, { "epoch": 0.7092691400918228, "grad_norm": 657.1807861328125, "learning_rate": 2.6088628167128794e-06, "loss": 81.276, "step": 85740 }, { "epoch": 0.709351863341192, "grad_norm": 703.9519653320312, "learning_rate": 2.607624276681241e-06, "loss": 73.3136, "step": 85750 }, { "epoch": 0.7094345865905612, "grad_norm": 596.41064453125, "learning_rate": 2.6063859269999594e-06, "loss": 77.5182, "step": 85760 }, { "epoch": 0.7095173098399306, "grad_norm": 327.9614562988281, "learning_rate": 2.605147767767564e-06, "loss": 84.4486, "step": 85770 }, { "epoch": 0.7096000330892998, "grad_norm": 914.2791137695312, "learning_rate": 2.6039097990825703e-06, "loss": 99.9321, "step": 85780 }, { "epoch": 0.709682756338669, "grad_norm": 758.622802734375, "learning_rate": 2.602672021043477e-06, "loss": 83.2458, "step": 85790 }, { "epoch": 0.7097654795880383, "grad_norm": 801.3850708007812, "learning_rate": 2.601434433748771e-06, "loss": 99.5949, "step": 85800 }, { "epoch": 0.7098482028374075, "grad_norm": 531.544189453125, "learning_rate": 2.600197037296917e-06, "loss": 85.0115, "step": 85810 }, { "epoch": 0.7099309260867767, "grad_norm": 977.3123779296875, "learning_rate": 2.5989598317863694e-06, "loss": 93.289, "step": 85820 }, { "epoch": 0.710013649336146, "grad_norm": 613.4664916992188, "learning_rate": 2.59772281731557e-06, "loss": 80.9545, "step": 85830 }, { "epoch": 0.7100963725855152, "grad_norm": 646.8933715820312, "learning_rate": 2.5964859939829423e-06, "loss": 107.6766, "step": 85840 }, { "epoch": 0.7101790958348844, "grad_norm": 650.1854858398438, "learning_rate": 2.595249361886892e-06, "loss": 75.5614, "step": 85850 }, { "epoch": 0.7102618190842537, "grad_norm": 816.5127563476562, "learning_rate": 2.5940129211258147e-06, "loss": 94.7183, "step": 85860 }, { "epoch": 0.7103445423336229, "grad_norm": 719.228271484375, "learning_rate": 2.5927766717980873e-06, "loss": 90.4481, "step": 85870 }, { "epoch": 0.7104272655829921, "grad_norm": 1630.4447021484375, "learning_rate": 2.5915406140020738e-06, "loss": 99.5369, "step": 85880 }, { "epoch": 0.7105099888323614, "grad_norm": 830.96044921875, "learning_rate": 2.590304747836119e-06, "loss": 106.8879, "step": 85890 }, { "epoch": 0.7105927120817306, "grad_norm": 1144.8453369140625, "learning_rate": 2.5890690733985555e-06, "loss": 93.4202, "step": 85900 }, { "epoch": 0.7106754353310998, "grad_norm": 1315.0010986328125, "learning_rate": 2.5878335907876997e-06, "loss": 75.4638, "step": 85910 }, { "epoch": 0.7107581585804691, "grad_norm": 748.9456176757812, "learning_rate": 2.5865983001018567e-06, "loss": 79.2242, "step": 85920 }, { "epoch": 0.7108408818298383, "grad_norm": 456.5758972167969, "learning_rate": 2.5853632014393108e-06, "loss": 61.4048, "step": 85930 }, { "epoch": 0.7109236050792075, "grad_norm": 565.6151733398438, "learning_rate": 2.584128294898334e-06, "loss": 79.6808, "step": 85940 }, { "epoch": 0.7110063283285768, "grad_norm": 922.67529296875, "learning_rate": 2.5828935805771804e-06, "loss": 141.0256, "step": 85950 }, { "epoch": 0.711089051577946, "grad_norm": 889.4005737304688, "learning_rate": 2.581659058574092e-06, "loss": 67.4175, "step": 85960 }, { "epoch": 0.7111717748273152, "grad_norm": 669.8568725585938, "learning_rate": 2.580424728987296e-06, "loss": 88.6796, "step": 85970 }, { "epoch": 0.7112544980766845, "grad_norm": 1054.4835205078125, "learning_rate": 2.5791905919149973e-06, "loss": 108.1136, "step": 85980 }, { "epoch": 0.7113372213260537, "grad_norm": 1388.4984130859375, "learning_rate": 2.5779566474553934e-06, "loss": 109.7086, "step": 85990 }, { "epoch": 0.7114199445754229, "grad_norm": 624.65673828125, "learning_rate": 2.5767228957066635e-06, "loss": 94.2603, "step": 86000 }, { "epoch": 0.7115026678247922, "grad_norm": 607.4688110351562, "learning_rate": 2.5754893367669697e-06, "loss": 75.7593, "step": 86010 }, { "epoch": 0.7115853910741614, "grad_norm": 1318.2784423828125, "learning_rate": 2.5742559707344638e-06, "loss": 98.0989, "step": 86020 }, { "epoch": 0.7116681143235306, "grad_norm": 647.1195068359375, "learning_rate": 2.573022797707278e-06, "loss": 79.2859, "step": 86030 }, { "epoch": 0.7117508375728998, "grad_norm": 461.97894287109375, "learning_rate": 2.57178981778353e-06, "loss": 101.9, "step": 86040 }, { "epoch": 0.7118335608222691, "grad_norm": 616.1342163085938, "learning_rate": 2.5705570310613215e-06, "loss": 101.6059, "step": 86050 }, { "epoch": 0.7119162840716383, "grad_norm": 997.8457641601562, "learning_rate": 2.5693244376387435e-06, "loss": 83.1273, "step": 86060 }, { "epoch": 0.7119990073210075, "grad_norm": 954.9032592773438, "learning_rate": 2.568092037613862e-06, "loss": 109.8029, "step": 86070 }, { "epoch": 0.7120817305703768, "grad_norm": 704.2922973632812, "learning_rate": 2.566859831084736e-06, "loss": 93.9186, "step": 86080 }, { "epoch": 0.712164453819746, "grad_norm": 826.8761596679688, "learning_rate": 2.5656278181494072e-06, "loss": 85.7292, "step": 86090 }, { "epoch": 0.7122471770691152, "grad_norm": 1042.4456787109375, "learning_rate": 2.5643959989058997e-06, "loss": 103.6534, "step": 86100 }, { "epoch": 0.7123299003184845, "grad_norm": 1412.099365234375, "learning_rate": 2.563164373452224e-06, "loss": 96.1712, "step": 86110 }, { "epoch": 0.7124126235678537, "grad_norm": 846.20947265625, "learning_rate": 2.561932941886377e-06, "loss": 93.9482, "step": 86120 }, { "epoch": 0.7124953468172229, "grad_norm": 650.3298950195312, "learning_rate": 2.560701704306336e-06, "loss": 94.7339, "step": 86130 }, { "epoch": 0.7125780700665922, "grad_norm": 903.107666015625, "learning_rate": 2.5594706608100677e-06, "loss": 68.6888, "step": 86140 }, { "epoch": 0.7126607933159614, "grad_norm": 2324.131591796875, "learning_rate": 2.5582398114955164e-06, "loss": 88.7436, "step": 86150 }, { "epoch": 0.7127435165653306, "grad_norm": 676.3530883789062, "learning_rate": 2.5570091564606182e-06, "loss": 95.5154, "step": 86160 }, { "epoch": 0.7128262398147, "grad_norm": 713.0216064453125, "learning_rate": 2.555778695803288e-06, "loss": 48.4695, "step": 86170 }, { "epoch": 0.7129089630640691, "grad_norm": 877.8251342773438, "learning_rate": 2.554548429621431e-06, "loss": 99.8856, "step": 86180 }, { "epoch": 0.7129916863134383, "grad_norm": 679.6746215820312, "learning_rate": 2.5533183580129317e-06, "loss": 109.55, "step": 86190 }, { "epoch": 0.7130744095628077, "grad_norm": 1079.2734375, "learning_rate": 2.5520884810756614e-06, "loss": 92.1818, "step": 86200 }, { "epoch": 0.7131571328121769, "grad_norm": 1415.2052001953125, "learning_rate": 2.550858798907475e-06, "loss": 98.666, "step": 86210 }, { "epoch": 0.713239856061546, "grad_norm": 498.3135986328125, "learning_rate": 2.5496293116062154e-06, "loss": 100.0309, "step": 86220 }, { "epoch": 0.7133225793109154, "grad_norm": 556.9967651367188, "learning_rate": 2.5484000192697078e-06, "loss": 128.9959, "step": 86230 }, { "epoch": 0.7134053025602846, "grad_norm": 1304.005126953125, "learning_rate": 2.5471709219957573e-06, "loss": 92.9146, "step": 86240 }, { "epoch": 0.7134880258096538, "grad_norm": 667.9055786132812, "learning_rate": 2.5459420198821604e-06, "loss": 96.6319, "step": 86250 }, { "epoch": 0.7135707490590231, "grad_norm": 969.6152954101562, "learning_rate": 2.5447133130266937e-06, "loss": 112.7888, "step": 86260 }, { "epoch": 0.7136534723083923, "grad_norm": 1060.4730224609375, "learning_rate": 2.5434848015271206e-06, "loss": 96.5464, "step": 86270 }, { "epoch": 0.7137361955577615, "grad_norm": 1293.969970703125, "learning_rate": 2.542256485481188e-06, "loss": 81.4303, "step": 86280 }, { "epoch": 0.7138189188071308, "grad_norm": 737.7543334960938, "learning_rate": 2.5410283649866272e-06, "loss": 86.3164, "step": 86290 }, { "epoch": 0.7139016420565, "grad_norm": 494.8547058105469, "learning_rate": 2.539800440141154e-06, "loss": 97.7299, "step": 86300 }, { "epoch": 0.7139843653058692, "grad_norm": 751.9924926757812, "learning_rate": 2.5385727110424697e-06, "loss": 82.0479, "step": 86310 }, { "epoch": 0.7140670885552385, "grad_norm": 854.956298828125, "learning_rate": 2.5373451777882575e-06, "loss": 98.6645, "step": 86320 }, { "epoch": 0.7141498118046077, "grad_norm": 744.7928466796875, "learning_rate": 2.5361178404761876e-06, "loss": 81.0549, "step": 86330 }, { "epoch": 0.7142325350539769, "grad_norm": 546.837646484375, "learning_rate": 2.534890699203914e-06, "loss": 85.5998, "step": 86340 }, { "epoch": 0.7143152583033462, "grad_norm": 846.6171875, "learning_rate": 2.533663754069074e-06, "loss": 82.133, "step": 86350 }, { "epoch": 0.7143979815527154, "grad_norm": 787.9846801757812, "learning_rate": 2.5324370051692905e-06, "loss": 96.9177, "step": 86360 }, { "epoch": 0.7144807048020846, "grad_norm": 494.6238708496094, "learning_rate": 2.5312104526021687e-06, "loss": 90.6376, "step": 86370 }, { "epoch": 0.7145634280514539, "grad_norm": 868.8283081054688, "learning_rate": 2.529984096465302e-06, "loss": 93.8985, "step": 86380 }, { "epoch": 0.7146461513008231, "grad_norm": 971.2010498046875, "learning_rate": 2.528757936856264e-06, "loss": 89.9199, "step": 86390 }, { "epoch": 0.7147288745501923, "grad_norm": 757.8761596679688, "learning_rate": 2.527531973872617e-06, "loss": 74.3093, "step": 86400 }, { "epoch": 0.7148115977995616, "grad_norm": 610.7327270507812, "learning_rate": 2.5263062076119026e-06, "loss": 65.9494, "step": 86410 }, { "epoch": 0.7148943210489308, "grad_norm": 3729.9736328125, "learning_rate": 2.525080638171651e-06, "loss": 91.3989, "step": 86420 }, { "epoch": 0.7149770442983, "grad_norm": 1108.1942138671875, "learning_rate": 2.5238552656493743e-06, "loss": 89.1207, "step": 86430 }, { "epoch": 0.7150597675476693, "grad_norm": 811.8157958984375, "learning_rate": 2.52263009014257e-06, "loss": 111.9879, "step": 86440 }, { "epoch": 0.7151424907970385, "grad_norm": 570.6813354492188, "learning_rate": 2.5214051117487205e-06, "loss": 85.2475, "step": 86450 }, { "epoch": 0.7152252140464077, "grad_norm": 888.626953125, "learning_rate": 2.52018033056529e-06, "loss": 82.2838, "step": 86460 }, { "epoch": 0.715307937295777, "grad_norm": 272.4425964355469, "learning_rate": 2.5189557466897306e-06, "loss": 62.3443, "step": 86470 }, { "epoch": 0.7153906605451462, "grad_norm": 869.8495483398438, "learning_rate": 2.517731360219476e-06, "loss": 92.5027, "step": 86480 }, { "epoch": 0.7154733837945154, "grad_norm": 892.153564453125, "learning_rate": 2.5165071712519447e-06, "loss": 101.1932, "step": 86490 }, { "epoch": 0.7155561070438847, "grad_norm": 1358.9947509765625, "learning_rate": 2.51528317988454e-06, "loss": 110.4798, "step": 86500 }, { "epoch": 0.7156388302932539, "grad_norm": 894.7091674804688, "learning_rate": 2.5140593862146496e-06, "loss": 90.021, "step": 86510 }, { "epoch": 0.7157215535426231, "grad_norm": 682.6785888671875, "learning_rate": 2.512835790339645e-06, "loss": 95.0613, "step": 86520 }, { "epoch": 0.7158042767919924, "grad_norm": 1361.6385498046875, "learning_rate": 2.5116123923568815e-06, "loss": 111.4644, "step": 86530 }, { "epoch": 0.7158870000413616, "grad_norm": 625.406982421875, "learning_rate": 2.5103891923637e-06, "loss": 98.9246, "step": 86540 }, { "epoch": 0.7159697232907308, "grad_norm": 509.01702880859375, "learning_rate": 2.509166190457425e-06, "loss": 85.337, "step": 86550 }, { "epoch": 0.7160524465401001, "grad_norm": 746.6972045898438, "learning_rate": 2.5079433867353646e-06, "loss": 92.2448, "step": 86560 }, { "epoch": 0.7161351697894693, "grad_norm": 1138.3980712890625, "learning_rate": 2.5067207812948123e-06, "loss": 92.1766, "step": 86570 }, { "epoch": 0.7162178930388385, "grad_norm": 1579.0123291015625, "learning_rate": 2.505498374233044e-06, "loss": 92.1637, "step": 86580 }, { "epoch": 0.7163006162882078, "grad_norm": 728.6637573242188, "learning_rate": 2.5042761656473226e-06, "loss": 103.8948, "step": 86590 }, { "epoch": 0.716383339537577, "grad_norm": 946.8383178710938, "learning_rate": 2.503054155634893e-06, "loss": 106.1606, "step": 86600 }, { "epoch": 0.7164660627869462, "grad_norm": 1170.6138916015625, "learning_rate": 2.5018323442929844e-06, "loss": 66.9248, "step": 86610 }, { "epoch": 0.7165487860363156, "grad_norm": 403.2840881347656, "learning_rate": 2.500610731718811e-06, "loss": 96.3434, "step": 86620 }, { "epoch": 0.7166315092856848, "grad_norm": 940.978271484375, "learning_rate": 2.499389318009571e-06, "loss": 102.5182, "step": 86630 }, { "epoch": 0.716714232535054, "grad_norm": 513.7085571289062, "learning_rate": 2.4981681032624473e-06, "loss": 78.1403, "step": 86640 }, { "epoch": 0.7167969557844233, "grad_norm": 1063.4349365234375, "learning_rate": 2.4969470875746055e-06, "loss": 110.9906, "step": 86650 }, { "epoch": 0.7168796790337925, "grad_norm": 577.010986328125, "learning_rate": 2.495726271043198e-06, "loss": 77.6551, "step": 86660 }, { "epoch": 0.7169624022831617, "grad_norm": 457.9773864746094, "learning_rate": 2.4945056537653545e-06, "loss": 65.822, "step": 86670 }, { "epoch": 0.717045125532531, "grad_norm": 985.6693115234375, "learning_rate": 2.493285235838199e-06, "loss": 76.2427, "step": 86680 }, { "epoch": 0.7171278487819002, "grad_norm": 1075.8038330078125, "learning_rate": 2.492065017358834e-06, "loss": 108.8681, "step": 86690 }, { "epoch": 0.7172105720312694, "grad_norm": 618.4483642578125, "learning_rate": 2.4908449984243448e-06, "loss": 81.0635, "step": 86700 }, { "epoch": 0.7172932952806387, "grad_norm": 1000.34521484375, "learning_rate": 2.4896251791318036e-06, "loss": 88.2856, "step": 86710 }, { "epoch": 0.7173760185300079, "grad_norm": 840.3250732421875, "learning_rate": 2.4884055595782666e-06, "loss": 76.9793, "step": 86720 }, { "epoch": 0.7174587417793771, "grad_norm": 1622.5933837890625, "learning_rate": 2.487186139860772e-06, "loss": 99.8122, "step": 86730 }, { "epoch": 0.7175414650287464, "grad_norm": 805.3701171875, "learning_rate": 2.485966920076346e-06, "loss": 88.3674, "step": 86740 }, { "epoch": 0.7176241882781156, "grad_norm": 995.889892578125, "learning_rate": 2.4847479003219926e-06, "loss": 91.9693, "step": 86750 }, { "epoch": 0.7177069115274848, "grad_norm": 657.5465087890625, "learning_rate": 2.4835290806947047e-06, "loss": 78.4104, "step": 86760 }, { "epoch": 0.717789634776854, "grad_norm": 632.3702392578125, "learning_rate": 2.4823104612914578e-06, "loss": 109.4438, "step": 86770 }, { "epoch": 0.7178723580262233, "grad_norm": 484.9309997558594, "learning_rate": 2.4810920422092137e-06, "loss": 92.3368, "step": 86780 }, { "epoch": 0.7179550812755925, "grad_norm": 791.4384155273438, "learning_rate": 2.4798738235449164e-06, "loss": 90.1838, "step": 86790 }, { "epoch": 0.7180378045249617, "grad_norm": 768.782470703125, "learning_rate": 2.478655805395493e-06, "loss": 93.8179, "step": 86800 }, { "epoch": 0.718120527774331, "grad_norm": 603.9031982421875, "learning_rate": 2.477437987857856e-06, "loss": 66.2067, "step": 86810 }, { "epoch": 0.7182032510237002, "grad_norm": 695.658447265625, "learning_rate": 2.4762203710289008e-06, "loss": 90.285, "step": 86820 }, { "epoch": 0.7182859742730694, "grad_norm": 688.7564697265625, "learning_rate": 2.4750029550055098e-06, "loss": 104.6844, "step": 86830 }, { "epoch": 0.7183686975224387, "grad_norm": 594.9547729492188, "learning_rate": 2.473785739884544e-06, "loss": 79.156, "step": 86840 }, { "epoch": 0.7184514207718079, "grad_norm": 776.5545043945312, "learning_rate": 2.4725687257628533e-06, "loss": 110.7488, "step": 86850 }, { "epoch": 0.7185341440211771, "grad_norm": 1479.708740234375, "learning_rate": 2.47135191273727e-06, "loss": 157.2341, "step": 86860 }, { "epoch": 0.7186168672705464, "grad_norm": 855.310302734375, "learning_rate": 2.4701353009046075e-06, "loss": 98.2323, "step": 86870 }, { "epoch": 0.7186995905199156, "grad_norm": 1696.113525390625, "learning_rate": 2.4689188903616707e-06, "loss": 87.6517, "step": 86880 }, { "epoch": 0.7187823137692848, "grad_norm": 622.81787109375, "learning_rate": 2.467702681205241e-06, "loss": 83.5084, "step": 86890 }, { "epoch": 0.7188650370186541, "grad_norm": 815.8912353515625, "learning_rate": 2.4664866735320886e-06, "loss": 93.1341, "step": 86900 }, { "epoch": 0.7189477602680233, "grad_norm": 599.5869140625, "learning_rate": 2.4652708674389636e-06, "loss": 106.3732, "step": 86910 }, { "epoch": 0.7190304835173925, "grad_norm": 917.40380859375, "learning_rate": 2.464055263022605e-06, "loss": 90.2012, "step": 86920 }, { "epoch": 0.7191132067667618, "grad_norm": 765.8475952148438, "learning_rate": 2.462839860379729e-06, "loss": 76.7954, "step": 86930 }, { "epoch": 0.719195930016131, "grad_norm": 1118.771240234375, "learning_rate": 2.46162465960704e-06, "loss": 88.3979, "step": 86940 }, { "epoch": 0.7192786532655002, "grad_norm": 841.2666625976562, "learning_rate": 2.460409660801229e-06, "loss": 77.0938, "step": 86950 }, { "epoch": 0.7193613765148695, "grad_norm": 579.8666381835938, "learning_rate": 2.459194864058963e-06, "loss": 81.2424, "step": 86960 }, { "epoch": 0.7194440997642387, "grad_norm": 2329.31884765625, "learning_rate": 2.457980269476903e-06, "loss": 117.6933, "step": 86970 }, { "epoch": 0.7195268230136079, "grad_norm": 662.1182250976562, "learning_rate": 2.4567658771516876e-06, "loss": 80.9285, "step": 86980 }, { "epoch": 0.7196095462629772, "grad_norm": 798.1659545898438, "learning_rate": 2.455551687179939e-06, "loss": 75.5553, "step": 86990 }, { "epoch": 0.7196922695123464, "grad_norm": 685.1201171875, "learning_rate": 2.454337699658267e-06, "loss": 100.3396, "step": 87000 }, { "epoch": 0.7197749927617156, "grad_norm": 951.0033569335938, "learning_rate": 2.453123914683259e-06, "loss": 96.5214, "step": 87010 }, { "epoch": 0.719857716011085, "grad_norm": 571.2054443359375, "learning_rate": 2.4519103323514932e-06, "loss": 74.3453, "step": 87020 }, { "epoch": 0.7199404392604541, "grad_norm": 1552.3475341796875, "learning_rate": 2.4506969527595277e-06, "loss": 86.0868, "step": 87030 }, { "epoch": 0.7200231625098233, "grad_norm": 716.04931640625, "learning_rate": 2.4494837760039057e-06, "loss": 91.2232, "step": 87040 }, { "epoch": 0.7201058857591927, "grad_norm": 1257.593994140625, "learning_rate": 2.4482708021811546e-06, "loss": 86.8641, "step": 87050 }, { "epoch": 0.7201886090085619, "grad_norm": 950.4786376953125, "learning_rate": 2.4470580313877833e-06, "loss": 97.9864, "step": 87060 }, { "epoch": 0.720271332257931, "grad_norm": 1004.8903198242188, "learning_rate": 2.44584546372029e-06, "loss": 88.7277, "step": 87070 }, { "epoch": 0.7203540555073004, "grad_norm": 937.1966552734375, "learning_rate": 2.4446330992751504e-06, "loss": 93.3525, "step": 87080 }, { "epoch": 0.7204367787566696, "grad_norm": 994.6897583007812, "learning_rate": 2.44342093814883e-06, "loss": 79.5034, "step": 87090 }, { "epoch": 0.7205195020060388, "grad_norm": 663.3712768554688, "learning_rate": 2.442208980437771e-06, "loss": 119.1683, "step": 87100 }, { "epoch": 0.7206022252554081, "grad_norm": 734.4451904296875, "learning_rate": 2.4409972262384037e-06, "loss": 82.272, "step": 87110 }, { "epoch": 0.7206849485047773, "grad_norm": 511.676513671875, "learning_rate": 2.4397856756471435e-06, "loss": 75.2757, "step": 87120 }, { "epoch": 0.7207676717541465, "grad_norm": 1072.48046875, "learning_rate": 2.438574328760387e-06, "loss": 118.7081, "step": 87130 }, { "epoch": 0.7208503950035158, "grad_norm": 1304.2197265625, "learning_rate": 2.437363185674516e-06, "loss": 65.1885, "step": 87140 }, { "epoch": 0.720933118252885, "grad_norm": 734.7349243164062, "learning_rate": 2.4361522464858956e-06, "loss": 75.8047, "step": 87150 }, { "epoch": 0.7210158415022542, "grad_norm": 775.7440795898438, "learning_rate": 2.434941511290872e-06, "loss": 84.4206, "step": 87160 }, { "epoch": 0.7210985647516235, "grad_norm": 846.2425537109375, "learning_rate": 2.4337309801857846e-06, "loss": 75.5298, "step": 87170 }, { "epoch": 0.7211812880009927, "grad_norm": 950.1725463867188, "learning_rate": 2.432520653266943e-06, "loss": 137.4889, "step": 87180 }, { "epoch": 0.7212640112503619, "grad_norm": 1078.6646728515625, "learning_rate": 2.4313105306306505e-06, "loss": 104.9158, "step": 87190 }, { "epoch": 0.7213467344997312, "grad_norm": 807.844970703125, "learning_rate": 2.4301006123731908e-06, "loss": 86.7134, "step": 87200 }, { "epoch": 0.7214294577491004, "grad_norm": 1981.1280517578125, "learning_rate": 2.4288908985908304e-06, "loss": 90.906, "step": 87210 }, { "epoch": 0.7215121809984696, "grad_norm": 573.0137939453125, "learning_rate": 2.4276813893798212e-06, "loss": 86.2283, "step": 87220 }, { "epoch": 0.7215949042478389, "grad_norm": 1042.208984375, "learning_rate": 2.4264720848363992e-06, "loss": 79.0382, "step": 87230 }, { "epoch": 0.7216776274972081, "grad_norm": 667.932861328125, "learning_rate": 2.4252629850567823e-06, "loss": 80.4661, "step": 87240 }, { "epoch": 0.7217603507465773, "grad_norm": 869.255615234375, "learning_rate": 2.4240540901371727e-06, "loss": 93.8904, "step": 87250 }, { "epoch": 0.7218430739959466, "grad_norm": 1087.067626953125, "learning_rate": 2.4228454001737576e-06, "loss": 70.2305, "step": 87260 }, { "epoch": 0.7219257972453158, "grad_norm": 853.7138061523438, "learning_rate": 2.421636915262707e-06, "loss": 82.3333, "step": 87270 }, { "epoch": 0.722008520494685, "grad_norm": 575.6585083007812, "learning_rate": 2.420428635500173e-06, "loss": 71.9151, "step": 87280 }, { "epoch": 0.7220912437440543, "grad_norm": 846.3626708984375, "learning_rate": 2.419220560982294e-06, "loss": 111.3039, "step": 87290 }, { "epoch": 0.7221739669934235, "grad_norm": 1180.4281005859375, "learning_rate": 2.418012691805191e-06, "loss": 96.9584, "step": 87300 }, { "epoch": 0.7222566902427927, "grad_norm": 992.0172119140625, "learning_rate": 2.4168050280649686e-06, "loss": 136.7477, "step": 87310 }, { "epoch": 0.722339413492162, "grad_norm": 1166.8099365234375, "learning_rate": 2.4155975698577146e-06, "loss": 88.4807, "step": 87320 }, { "epoch": 0.7224221367415312, "grad_norm": 758.3084106445312, "learning_rate": 2.4143903172795014e-06, "loss": 73.7974, "step": 87330 }, { "epoch": 0.7225048599909004, "grad_norm": 805.8015747070312, "learning_rate": 2.4131832704263842e-06, "loss": 91.9121, "step": 87340 }, { "epoch": 0.7225875832402697, "grad_norm": 889.5006103515625, "learning_rate": 2.411976429394402e-06, "loss": 82.7861, "step": 87350 }, { "epoch": 0.7226703064896389, "grad_norm": 859.7445068359375, "learning_rate": 2.4107697942795782e-06, "loss": 86.8242, "step": 87360 }, { "epoch": 0.7227530297390081, "grad_norm": 1024.7366943359375, "learning_rate": 2.4095633651779186e-06, "loss": 75.2, "step": 87370 }, { "epoch": 0.7228357529883774, "grad_norm": 732.5393676757812, "learning_rate": 2.4083571421854137e-06, "loss": 83.1405, "step": 87380 }, { "epoch": 0.7229184762377466, "grad_norm": 754.288818359375, "learning_rate": 2.407151125398037e-06, "loss": 87.5245, "step": 87390 }, { "epoch": 0.7230011994871158, "grad_norm": 859.1074829101562, "learning_rate": 2.405945314911746e-06, "loss": 90.8254, "step": 87400 }, { "epoch": 0.7230839227364851, "grad_norm": 841.62744140625, "learning_rate": 2.4047397108224807e-06, "loss": 84.0488, "step": 87410 }, { "epoch": 0.7231666459858543, "grad_norm": 1061.734375, "learning_rate": 2.403534313226166e-06, "loss": 95.578, "step": 87420 }, { "epoch": 0.7232493692352235, "grad_norm": 678.4471435546875, "learning_rate": 2.40232912221871e-06, "loss": 119.1306, "step": 87430 }, { "epoch": 0.7233320924845928, "grad_norm": 1203.9510498046875, "learning_rate": 2.4011241378960037e-06, "loss": 87.4557, "step": 87440 }, { "epoch": 0.723414815733962, "grad_norm": 475.9383239746094, "learning_rate": 2.3999193603539234e-06, "loss": 83.3428, "step": 87450 }, { "epoch": 0.7234975389833312, "grad_norm": 1285.626708984375, "learning_rate": 2.3987147896883263e-06, "loss": 116.1732, "step": 87460 }, { "epoch": 0.7235802622327006, "grad_norm": 1077.05078125, "learning_rate": 2.397510425995055e-06, "loss": 82.0933, "step": 87470 }, { "epoch": 0.7236629854820698, "grad_norm": 661.8814697265625, "learning_rate": 2.3963062693699353e-06, "loss": 78.9931, "step": 87480 }, { "epoch": 0.723745708731439, "grad_norm": 866.9628295898438, "learning_rate": 2.3951023199087763e-06, "loss": 92.3042, "step": 87490 }, { "epoch": 0.7238284319808082, "grad_norm": 463.5540771484375, "learning_rate": 2.393898577707371e-06, "loss": 90.7406, "step": 87500 }, { "epoch": 0.7239111552301775, "grad_norm": 667.8403930664062, "learning_rate": 2.392695042861495e-06, "loss": 99.6054, "step": 87510 }, { "epoch": 0.7239938784795467, "grad_norm": 685.7771606445312, "learning_rate": 2.391491715466909e-06, "loss": 75.8269, "step": 87520 }, { "epoch": 0.7240766017289159, "grad_norm": 666.2769775390625, "learning_rate": 2.390288595619356e-06, "loss": 74.8925, "step": 87530 }, { "epoch": 0.7241593249782852, "grad_norm": 1205.9483642578125, "learning_rate": 2.3890856834145625e-06, "loss": 104.4413, "step": 87540 }, { "epoch": 0.7242420482276544, "grad_norm": 823.16748046875, "learning_rate": 2.3878829789482385e-06, "loss": 112.2172, "step": 87550 }, { "epoch": 0.7243247714770236, "grad_norm": 675.4729614257812, "learning_rate": 2.3866804823160776e-06, "loss": 71.6798, "step": 87560 }, { "epoch": 0.7244074947263929, "grad_norm": 931.8217163085938, "learning_rate": 2.385478193613758e-06, "loss": 102.3784, "step": 87570 }, { "epoch": 0.7244902179757621, "grad_norm": 906.8097534179688, "learning_rate": 2.3842761129369387e-06, "loss": 97.9349, "step": 87580 }, { "epoch": 0.7245729412251313, "grad_norm": 899.681884765625, "learning_rate": 2.3830742403812646e-06, "loss": 72.5381, "step": 87590 }, { "epoch": 0.7246556644745006, "grad_norm": 828.892822265625, "learning_rate": 2.381872576042365e-06, "loss": 87.6623, "step": 87600 }, { "epoch": 0.7247383877238698, "grad_norm": 1114.9024658203125, "learning_rate": 2.3806711200158473e-06, "loss": 94.0342, "step": 87610 }, { "epoch": 0.724821110973239, "grad_norm": 899.71630859375, "learning_rate": 2.3794698723973057e-06, "loss": 63.5924, "step": 87620 }, { "epoch": 0.7249038342226083, "grad_norm": 1092.0023193359375, "learning_rate": 2.3782688332823212e-06, "loss": 83.4462, "step": 87630 }, { "epoch": 0.7249865574719775, "grad_norm": 982.8212280273438, "learning_rate": 2.3770680027664537e-06, "loss": 82.2134, "step": 87640 }, { "epoch": 0.7250692807213467, "grad_norm": 1915.584228515625, "learning_rate": 2.3758673809452484e-06, "loss": 76.1468, "step": 87650 }, { "epoch": 0.725152003970716, "grad_norm": 647.2635498046875, "learning_rate": 2.3746669679142315e-06, "loss": 87.5242, "step": 87660 }, { "epoch": 0.7252347272200852, "grad_norm": 834.3574829101562, "learning_rate": 2.373466763768915e-06, "loss": 96.0043, "step": 87670 }, { "epoch": 0.7253174504694544, "grad_norm": 553.946044921875, "learning_rate": 2.3722667686047945e-06, "loss": 73.195, "step": 87680 }, { "epoch": 0.7254001737188237, "grad_norm": 719.322998046875, "learning_rate": 2.37106698251735e-06, "loss": 87.8059, "step": 87690 }, { "epoch": 0.7254828969681929, "grad_norm": 1009.9353637695312, "learning_rate": 2.3698674056020378e-06, "loss": 86.6256, "step": 87700 }, { "epoch": 0.7255656202175621, "grad_norm": 732.2600708007812, "learning_rate": 2.3686680379543057e-06, "loss": 105.3399, "step": 87710 }, { "epoch": 0.7256483434669314, "grad_norm": 1122.77783203125, "learning_rate": 2.36746887966958e-06, "loss": 75.9752, "step": 87720 }, { "epoch": 0.7257310667163006, "grad_norm": 834.73388671875, "learning_rate": 2.366269930843275e-06, "loss": 101.7426, "step": 87730 }, { "epoch": 0.7258137899656698, "grad_norm": 1176.048583984375, "learning_rate": 2.3650711915707852e-06, "loss": 89.6517, "step": 87740 }, { "epoch": 0.7258965132150391, "grad_norm": 902.6798706054688, "learning_rate": 2.363872661947488e-06, "loss": 94.8531, "step": 87750 }, { "epoch": 0.7259792364644083, "grad_norm": 875.5723876953125, "learning_rate": 2.362674342068744e-06, "loss": 98.4001, "step": 87760 }, { "epoch": 0.7260619597137775, "grad_norm": 1041.4979248046875, "learning_rate": 2.3614762320299e-06, "loss": 86.5132, "step": 87770 }, { "epoch": 0.7261446829631468, "grad_norm": 634.3117065429688, "learning_rate": 2.3602783319262847e-06, "loss": 88.5851, "step": 87780 }, { "epoch": 0.726227406212516, "grad_norm": 468.2691955566406, "learning_rate": 2.3590806418532052e-06, "loss": 107.173, "step": 87790 }, { "epoch": 0.7263101294618852, "grad_norm": 743.6190185546875, "learning_rate": 2.3578831619059595e-06, "loss": 75.1913, "step": 87800 }, { "epoch": 0.7263928527112545, "grad_norm": 485.16510009765625, "learning_rate": 2.3566858921798246e-06, "loss": 69.3648, "step": 87810 }, { "epoch": 0.7264755759606237, "grad_norm": 559.470703125, "learning_rate": 2.3554888327700604e-06, "loss": 79.9045, "step": 87820 }, { "epoch": 0.7265582992099929, "grad_norm": 720.0167236328125, "learning_rate": 2.3542919837719154e-06, "loss": 82.7708, "step": 87830 }, { "epoch": 0.7266410224593622, "grad_norm": 1213.031005859375, "learning_rate": 2.3530953452806143e-06, "loss": 86.5757, "step": 87840 }, { "epoch": 0.7267237457087314, "grad_norm": 729.4437866210938, "learning_rate": 2.351898917391369e-06, "loss": 78.6642, "step": 87850 }, { "epoch": 0.7268064689581006, "grad_norm": 938.8148193359375, "learning_rate": 2.350702700199376e-06, "loss": 78.4311, "step": 87860 }, { "epoch": 0.72688919220747, "grad_norm": 770.9730834960938, "learning_rate": 2.3495066937998085e-06, "loss": 83.714, "step": 87870 }, { "epoch": 0.7269719154568391, "grad_norm": 1002.52490234375, "learning_rate": 2.3483108982878294e-06, "loss": 80.0576, "step": 87880 }, { "epoch": 0.7270546387062083, "grad_norm": 594.104248046875, "learning_rate": 2.3471153137585823e-06, "loss": 66.6908, "step": 87890 }, { "epoch": 0.7271373619555777, "grad_norm": 819.463134765625, "learning_rate": 2.345919940307195e-06, "loss": 78.4313, "step": 87900 }, { "epoch": 0.7272200852049469, "grad_norm": 1145.914306640625, "learning_rate": 2.3447247780287746e-06, "loss": 127.2008, "step": 87910 }, { "epoch": 0.727302808454316, "grad_norm": 698.8175048828125, "learning_rate": 2.3435298270184204e-06, "loss": 99.664, "step": 87920 }, { "epoch": 0.7273855317036854, "grad_norm": 763.3477172851562, "learning_rate": 2.3423350873712057e-06, "loss": 84.8625, "step": 87930 }, { "epoch": 0.7274682549530546, "grad_norm": 1032.08349609375, "learning_rate": 2.341140559182192e-06, "loss": 83.9295, "step": 87940 }, { "epoch": 0.7275509782024238, "grad_norm": 821.6600341796875, "learning_rate": 2.339946242546422e-06, "loss": 84.5949, "step": 87950 }, { "epoch": 0.7276337014517931, "grad_norm": 1196.482421875, "learning_rate": 2.3387521375589205e-06, "loss": 103.7103, "step": 87960 }, { "epoch": 0.7277164247011623, "grad_norm": 939.4242553710938, "learning_rate": 2.3375582443146977e-06, "loss": 97.3099, "step": 87970 }, { "epoch": 0.7277991479505315, "grad_norm": 656.9828491210938, "learning_rate": 2.3363645629087467e-06, "loss": 77.6787, "step": 87980 }, { "epoch": 0.7278818711999008, "grad_norm": 467.5138854980469, "learning_rate": 2.3351710934360426e-06, "loss": 96.5642, "step": 87990 }, { "epoch": 0.72796459444927, "grad_norm": 705.623046875, "learning_rate": 2.333977835991545e-06, "loss": 90.1867, "step": 88000 }, { "epoch": 0.7280473176986392, "grad_norm": 1090.185302734375, "learning_rate": 2.3327847906701932e-06, "loss": 85.2803, "step": 88010 }, { "epoch": 0.7281300409480085, "grad_norm": 793.4896240234375, "learning_rate": 2.3315919575669172e-06, "loss": 94.3953, "step": 88020 }, { "epoch": 0.7282127641973777, "grad_norm": 584.8360595703125, "learning_rate": 2.330399336776625e-06, "loss": 111.1713, "step": 88030 }, { "epoch": 0.7282954874467469, "grad_norm": 953.3636474609375, "learning_rate": 2.329206928394203e-06, "loss": 98.6218, "step": 88040 }, { "epoch": 0.7283782106961162, "grad_norm": 915.4071655273438, "learning_rate": 2.3280147325145285e-06, "loss": 115.2945, "step": 88050 }, { "epoch": 0.7284609339454854, "grad_norm": 500.0581970214844, "learning_rate": 2.3268227492324594e-06, "loss": 102.7827, "step": 88060 }, { "epoch": 0.7285436571948546, "grad_norm": 864.3734130859375, "learning_rate": 2.325630978642836e-06, "loss": 87.891, "step": 88070 }, { "epoch": 0.7286263804442239, "grad_norm": 819.2616577148438, "learning_rate": 2.3244394208404816e-06, "loss": 80.8149, "step": 88080 }, { "epoch": 0.7287091036935931, "grad_norm": 1025.54443359375, "learning_rate": 2.3232480759202035e-06, "loss": 112.5396, "step": 88090 }, { "epoch": 0.7287918269429623, "grad_norm": 786.0123291015625, "learning_rate": 2.3220569439767907e-06, "loss": 95.3773, "step": 88100 }, { "epoch": 0.7288745501923316, "grad_norm": 1449.097900390625, "learning_rate": 2.320866025105016e-06, "loss": 100.4124, "step": 88110 }, { "epoch": 0.7289572734417008, "grad_norm": 668.387939453125, "learning_rate": 2.319675319399639e-06, "loss": 90.4953, "step": 88120 }, { "epoch": 0.72903999669107, "grad_norm": 753.0546264648438, "learning_rate": 2.3184848269553944e-06, "loss": 94.4148, "step": 88130 }, { "epoch": 0.7291227199404393, "grad_norm": 1106.7774658203125, "learning_rate": 2.3172945478670056e-06, "loss": 91.0953, "step": 88140 }, { "epoch": 0.7292054431898085, "grad_norm": 582.337646484375, "learning_rate": 2.316104482229178e-06, "loss": 84.3528, "step": 88150 }, { "epoch": 0.7292881664391777, "grad_norm": 1405.619873046875, "learning_rate": 2.314914630136599e-06, "loss": 76.6839, "step": 88160 }, { "epoch": 0.729370889688547, "grad_norm": 870.0715942382812, "learning_rate": 2.3137249916839394e-06, "loss": 72.3327, "step": 88170 }, { "epoch": 0.7294536129379162, "grad_norm": 719.5054931640625, "learning_rate": 2.3125355669658547e-06, "loss": 90.0703, "step": 88180 }, { "epoch": 0.7295363361872854, "grad_norm": 780.942138671875, "learning_rate": 2.3113463560769807e-06, "loss": 109.0758, "step": 88190 }, { "epoch": 0.7296190594366547, "grad_norm": 927.2631225585938, "learning_rate": 2.310157359111938e-06, "loss": 97.629, "step": 88200 }, { "epoch": 0.7297017826860239, "grad_norm": 558.8131103515625, "learning_rate": 2.3089685761653296e-06, "loss": 102.3254, "step": 88210 }, { "epoch": 0.7297845059353931, "grad_norm": 982.3890380859375, "learning_rate": 2.3077800073317415e-06, "loss": 102.417, "step": 88220 }, { "epoch": 0.7298672291847623, "grad_norm": 754.8167724609375, "learning_rate": 2.3065916527057426e-06, "loss": 88.5576, "step": 88230 }, { "epoch": 0.7299499524341316, "grad_norm": 659.7408447265625, "learning_rate": 2.305403512381884e-06, "loss": 85.5432, "step": 88240 }, { "epoch": 0.7300326756835008, "grad_norm": 838.64697265625, "learning_rate": 2.3042155864547024e-06, "loss": 78.8474, "step": 88250 }, { "epoch": 0.73011539893287, "grad_norm": 750.2058715820312, "learning_rate": 2.303027875018714e-06, "loss": 88.2092, "step": 88260 }, { "epoch": 0.7301981221822393, "grad_norm": 926.1990966796875, "learning_rate": 2.3018403781684205e-06, "loss": 79.4068, "step": 88270 }, { "epoch": 0.7302808454316085, "grad_norm": 645.7316284179688, "learning_rate": 2.3006530959983055e-06, "loss": 67.1499, "step": 88280 }, { "epoch": 0.7303635686809777, "grad_norm": 733.4593505859375, "learning_rate": 2.299466028602835e-06, "loss": 58.1151, "step": 88290 }, { "epoch": 0.730446291930347, "grad_norm": 1105.638671875, "learning_rate": 2.298279176076459e-06, "loss": 87.0662, "step": 88300 }, { "epoch": 0.7305290151797162, "grad_norm": 1261.8275146484375, "learning_rate": 2.2970925385136093e-06, "loss": 91.2717, "step": 88310 }, { "epoch": 0.7306117384290854, "grad_norm": 857.1454467773438, "learning_rate": 2.295906116008702e-06, "loss": 77.8175, "step": 88320 }, { "epoch": 0.7306944616784548, "grad_norm": 852.4544067382812, "learning_rate": 2.2947199086561346e-06, "loss": 92.5309, "step": 88330 }, { "epoch": 0.730777184927824, "grad_norm": 1291.9940185546875, "learning_rate": 2.293533916550289e-06, "loss": 93.7867, "step": 88340 }, { "epoch": 0.7308599081771932, "grad_norm": 805.106201171875, "learning_rate": 2.292348139785528e-06, "loss": 91.1033, "step": 88350 }, { "epoch": 0.7309426314265625, "grad_norm": 1165.837890625, "learning_rate": 2.2911625784562e-06, "loss": 102.6606, "step": 88360 }, { "epoch": 0.7310253546759317, "grad_norm": 1081.798095703125, "learning_rate": 2.2899772326566327e-06, "loss": 115.492, "step": 88370 }, { "epoch": 0.7311080779253009, "grad_norm": 2020.0845947265625, "learning_rate": 2.2887921024811405e-06, "loss": 91.6865, "step": 88380 }, { "epoch": 0.7311908011746702, "grad_norm": 739.7776489257812, "learning_rate": 2.2876071880240174e-06, "loss": 87.2011, "step": 88390 }, { "epoch": 0.7312735244240394, "grad_norm": 1269.20751953125, "learning_rate": 2.2864224893795423e-06, "loss": 90.7345, "step": 88400 }, { "epoch": 0.7313562476734086, "grad_norm": 859.2783203125, "learning_rate": 2.285238006641976e-06, "loss": 100.769, "step": 88410 }, { "epoch": 0.7314389709227779, "grad_norm": 957.8426513671875, "learning_rate": 2.284053739905563e-06, "loss": 106.6209, "step": 88420 }, { "epoch": 0.7315216941721471, "grad_norm": 1027.73095703125, "learning_rate": 2.282869689264529e-06, "loss": 90.6714, "step": 88430 }, { "epoch": 0.7316044174215163, "grad_norm": 711.40576171875, "learning_rate": 2.2816858548130837e-06, "loss": 69.7044, "step": 88440 }, { "epoch": 0.7316871406708856, "grad_norm": 721.7193603515625, "learning_rate": 2.28050223664542e-06, "loss": 75.2567, "step": 88450 }, { "epoch": 0.7317698639202548, "grad_norm": 1569.3946533203125, "learning_rate": 2.2793188348557136e-06, "loss": 88.7201, "step": 88460 }, { "epoch": 0.731852587169624, "grad_norm": 997.7343139648438, "learning_rate": 2.2781356495381186e-06, "loss": 142.2485, "step": 88470 }, { "epoch": 0.7319353104189933, "grad_norm": 973.0025634765625, "learning_rate": 2.276952680786779e-06, "loss": 96.4867, "step": 88480 }, { "epoch": 0.7320180336683625, "grad_norm": 626.2188110351562, "learning_rate": 2.2757699286958186e-06, "loss": 77.2272, "step": 88490 }, { "epoch": 0.7321007569177317, "grad_norm": 411.275146484375, "learning_rate": 2.274587393359342e-06, "loss": 80.0432, "step": 88500 }, { "epoch": 0.732183480167101, "grad_norm": 534.1046142578125, "learning_rate": 2.273405074871438e-06, "loss": 84.1023, "step": 88510 }, { "epoch": 0.7322662034164702, "grad_norm": 793.4088745117188, "learning_rate": 2.2722229733261795e-06, "loss": 70.7006, "step": 88520 }, { "epoch": 0.7323489266658394, "grad_norm": 710.7265014648438, "learning_rate": 2.2710410888176205e-06, "loss": 67.0532, "step": 88530 }, { "epoch": 0.7324316499152087, "grad_norm": 667.3509521484375, "learning_rate": 2.2698594214397966e-06, "loss": 90.6114, "step": 88540 }, { "epoch": 0.7325143731645779, "grad_norm": 981.4866943359375, "learning_rate": 2.268677971286732e-06, "loss": 102.2464, "step": 88550 }, { "epoch": 0.7325970964139471, "grad_norm": 807.9349975585938, "learning_rate": 2.2674967384524237e-06, "loss": 76.0214, "step": 88560 }, { "epoch": 0.7326798196633164, "grad_norm": 629.3372192382812, "learning_rate": 2.2663157230308576e-06, "loss": 127.0412, "step": 88570 }, { "epoch": 0.7327625429126856, "grad_norm": 1088.6207275390625, "learning_rate": 2.2651349251160055e-06, "loss": 87.4096, "step": 88580 }, { "epoch": 0.7328452661620548, "grad_norm": 692.8621215820312, "learning_rate": 2.263954344801816e-06, "loss": 73.1671, "step": 88590 }, { "epoch": 0.7329279894114241, "grad_norm": 654.3468017578125, "learning_rate": 2.2627739821822226e-06, "loss": 78.6952, "step": 88600 }, { "epoch": 0.7330107126607933, "grad_norm": 467.1300964355469, "learning_rate": 2.261593837351141e-06, "loss": 64.8719, "step": 88610 }, { "epoch": 0.7330934359101625, "grad_norm": 724.4471435546875, "learning_rate": 2.26041391040247e-06, "loss": 114.528, "step": 88620 }, { "epoch": 0.7331761591595318, "grad_norm": 960.2774658203125, "learning_rate": 2.259234201430092e-06, "loss": 106.1695, "step": 88630 }, { "epoch": 0.733258882408901, "grad_norm": 810.5166625976562, "learning_rate": 2.2580547105278716e-06, "loss": 80.9764, "step": 88640 }, { "epoch": 0.7333416056582702, "grad_norm": 919.3231811523438, "learning_rate": 2.2568754377896516e-06, "loss": 100.5872, "step": 88650 }, { "epoch": 0.7334243289076395, "grad_norm": 684.9686889648438, "learning_rate": 2.255696383309265e-06, "loss": 85.3732, "step": 88660 }, { "epoch": 0.7335070521570087, "grad_norm": 1018.3485107421875, "learning_rate": 2.2545175471805197e-06, "loss": 82.5631, "step": 88670 }, { "epoch": 0.7335897754063779, "grad_norm": 658.5642700195312, "learning_rate": 2.2533389294972153e-06, "loss": 127.2706, "step": 88680 }, { "epoch": 0.7336724986557472, "grad_norm": 582.38134765625, "learning_rate": 2.2521605303531267e-06, "loss": 81.0251, "step": 88690 }, { "epoch": 0.7337552219051164, "grad_norm": 940.820556640625, "learning_rate": 2.2509823498420142e-06, "loss": 96.1019, "step": 88700 }, { "epoch": 0.7338379451544856, "grad_norm": 1125.545654296875, "learning_rate": 2.2498043880576193e-06, "loss": 93.1604, "step": 88710 }, { "epoch": 0.733920668403855, "grad_norm": 742.275634765625, "learning_rate": 2.2486266450936695e-06, "loss": 69.7256, "step": 88720 }, { "epoch": 0.7340033916532241, "grad_norm": 985.7758178710938, "learning_rate": 2.2474491210438687e-06, "loss": 80.2849, "step": 88730 }, { "epoch": 0.7340861149025933, "grad_norm": 607.6522216796875, "learning_rate": 2.2462718160019086e-06, "loss": 90.2372, "step": 88740 }, { "epoch": 0.7341688381519627, "grad_norm": 690.2867431640625, "learning_rate": 2.245094730061463e-06, "loss": 67.3423, "step": 88750 }, { "epoch": 0.7342515614013319, "grad_norm": 643.0440063476562, "learning_rate": 2.2439178633161855e-06, "loss": 68.1398, "step": 88760 }, { "epoch": 0.734334284650701, "grad_norm": 737.9159545898438, "learning_rate": 2.2427412158597133e-06, "loss": 111.8004, "step": 88770 }, { "epoch": 0.7344170079000704, "grad_norm": 791.5084228515625, "learning_rate": 2.2415647877856706e-06, "loss": 97.5052, "step": 88780 }, { "epoch": 0.7344997311494396, "grad_norm": 746.7666015625, "learning_rate": 2.240388579187658e-06, "loss": 96.2792, "step": 88790 }, { "epoch": 0.7345824543988088, "grad_norm": 920.7720947265625, "learning_rate": 2.2392125901592615e-06, "loss": 74.4708, "step": 88800 }, { "epoch": 0.7346651776481781, "grad_norm": 1364.5985107421875, "learning_rate": 2.23803682079405e-06, "loss": 94.9622, "step": 88810 }, { "epoch": 0.7347479008975473, "grad_norm": 917.6015625, "learning_rate": 2.236861271185572e-06, "loss": 80.0564, "step": 88820 }, { "epoch": 0.7348306241469165, "grad_norm": 1057.025634765625, "learning_rate": 2.2356859414273613e-06, "loss": 73.4881, "step": 88830 }, { "epoch": 0.7349133473962858, "grad_norm": 886.4762573242188, "learning_rate": 2.2345108316129333e-06, "loss": 103.8498, "step": 88840 }, { "epoch": 0.734996070645655, "grad_norm": 528.6309814453125, "learning_rate": 2.233335941835787e-06, "loss": 77.8374, "step": 88850 }, { "epoch": 0.7350787938950242, "grad_norm": 901.2240600585938, "learning_rate": 2.232161272189401e-06, "loss": 81.7305, "step": 88860 }, { "epoch": 0.7351615171443935, "grad_norm": 1238.568359375, "learning_rate": 2.230986822767241e-06, "loss": 78.068, "step": 88870 }, { "epoch": 0.7352442403937627, "grad_norm": 1001.0896606445312, "learning_rate": 2.2298125936627517e-06, "loss": 70.9309, "step": 88880 }, { "epoch": 0.7353269636431319, "grad_norm": 1090.4779052734375, "learning_rate": 2.228638584969363e-06, "loss": 85.4851, "step": 88890 }, { "epoch": 0.7354096868925012, "grad_norm": 472.36602783203125, "learning_rate": 2.227464796780481e-06, "loss": 83.7774, "step": 88900 }, { "epoch": 0.7354924101418704, "grad_norm": 890.2085571289062, "learning_rate": 2.226291229189501e-06, "loss": 103.158, "step": 88910 }, { "epoch": 0.7355751333912396, "grad_norm": 735.7196044921875, "learning_rate": 2.225117882289799e-06, "loss": 105.7497, "step": 88920 }, { "epoch": 0.7356578566406089, "grad_norm": 647.6386108398438, "learning_rate": 2.223944756174731e-06, "loss": 79.5156, "step": 88930 }, { "epoch": 0.7357405798899781, "grad_norm": 1861.40869140625, "learning_rate": 2.2227718509376395e-06, "loss": 133.0161, "step": 88940 }, { "epoch": 0.7358233031393473, "grad_norm": 916.1025390625, "learning_rate": 2.221599166671845e-06, "loss": 104.4202, "step": 88950 }, { "epoch": 0.7359060263887165, "grad_norm": 1034.66015625, "learning_rate": 2.220426703470653e-06, "loss": 90.1667, "step": 88960 }, { "epoch": 0.7359887496380858, "grad_norm": 748.0284423828125, "learning_rate": 2.2192544614273526e-06, "loss": 58.7081, "step": 88970 }, { "epoch": 0.736071472887455, "grad_norm": 1415.480224609375, "learning_rate": 2.218082440635215e-06, "loss": 77.1677, "step": 88980 }, { "epoch": 0.7361541961368242, "grad_norm": 431.0531921386719, "learning_rate": 2.216910641187488e-06, "loss": 132.108, "step": 88990 }, { "epoch": 0.7362369193861935, "grad_norm": 609.1949462890625, "learning_rate": 2.215739063177409e-06, "loss": 85.791, "step": 89000 }, { "epoch": 0.7363196426355627, "grad_norm": 3827.787841796875, "learning_rate": 2.2145677066981948e-06, "loss": 111.6699, "step": 89010 }, { "epoch": 0.7364023658849319, "grad_norm": 393.4620361328125, "learning_rate": 2.213396571843045e-06, "loss": 80.0387, "step": 89020 }, { "epoch": 0.7364850891343012, "grad_norm": 636.8660278320312, "learning_rate": 2.2122256587051404e-06, "loss": 74.4118, "step": 89030 }, { "epoch": 0.7365678123836704, "grad_norm": 783.8397216796875, "learning_rate": 2.211054967377647e-06, "loss": 88.0103, "step": 89040 }, { "epoch": 0.7366505356330396, "grad_norm": 449.6473388671875, "learning_rate": 2.2098844979537093e-06, "loss": 105.3095, "step": 89050 }, { "epoch": 0.7367332588824089, "grad_norm": 1187.8668212890625, "learning_rate": 2.208714250526456e-06, "loss": 82.4545, "step": 89060 }, { "epoch": 0.7368159821317781, "grad_norm": 714.7520751953125, "learning_rate": 2.207544225189003e-06, "loss": 105.3385, "step": 89070 }, { "epoch": 0.7368987053811473, "grad_norm": 1098.642578125, "learning_rate": 2.2063744220344386e-06, "loss": 87.2493, "step": 89080 }, { "epoch": 0.7369814286305166, "grad_norm": 833.3667602539062, "learning_rate": 2.2052048411558403e-06, "loss": 89.0781, "step": 89090 }, { "epoch": 0.7370641518798858, "grad_norm": 1110.5281982421875, "learning_rate": 2.204035482646267e-06, "loss": 79.2814, "step": 89100 }, { "epoch": 0.737146875129255, "grad_norm": 981.9432983398438, "learning_rate": 2.2028663465987576e-06, "loss": 73.8698, "step": 89110 }, { "epoch": 0.7372295983786243, "grad_norm": 894.4478149414062, "learning_rate": 2.201697433106336e-06, "loss": 101.7881, "step": 89120 }, { "epoch": 0.7373123216279935, "grad_norm": 516.5750732421875, "learning_rate": 2.2005287422620083e-06, "loss": 82.2681, "step": 89130 }, { "epoch": 0.7373950448773627, "grad_norm": 549.2446899414062, "learning_rate": 2.19936027415876e-06, "loss": 72.0674, "step": 89140 }, { "epoch": 0.737477768126732, "grad_norm": 639.7827758789062, "learning_rate": 2.1981920288895615e-06, "loss": 86.3662, "step": 89150 }, { "epoch": 0.7375604913761012, "grad_norm": 978.5205078125, "learning_rate": 2.197024006547364e-06, "loss": 94.2316, "step": 89160 }, { "epoch": 0.7376432146254704, "grad_norm": 625.5220947265625, "learning_rate": 2.1958562072251033e-06, "loss": 97.0904, "step": 89170 }, { "epoch": 0.7377259378748398, "grad_norm": 1031.7874755859375, "learning_rate": 2.1946886310156945e-06, "loss": 97.9485, "step": 89180 }, { "epoch": 0.737808661124209, "grad_norm": 531.7047729492188, "learning_rate": 2.193521278012037e-06, "loss": 99.0671, "step": 89190 }, { "epoch": 0.7378913843735782, "grad_norm": 391.3578796386719, "learning_rate": 2.1923541483070114e-06, "loss": 91.6598, "step": 89200 }, { "epoch": 0.7379741076229475, "grad_norm": 606.8892211914062, "learning_rate": 2.1911872419934804e-06, "loss": 65.9362, "step": 89210 }, { "epoch": 0.7380568308723167, "grad_norm": 890.9434814453125, "learning_rate": 2.1900205591642904e-06, "loss": 82.1377, "step": 89220 }, { "epoch": 0.7381395541216859, "grad_norm": 1102.2276611328125, "learning_rate": 2.188854099912268e-06, "loss": 81.4802, "step": 89230 }, { "epoch": 0.7382222773710552, "grad_norm": 614.1528930664062, "learning_rate": 2.187687864330224e-06, "loss": 88.8945, "step": 89240 }, { "epoch": 0.7383050006204244, "grad_norm": 611.352294921875, "learning_rate": 2.1865218525109496e-06, "loss": 78.2255, "step": 89250 }, { "epoch": 0.7383877238697936, "grad_norm": 934.4541015625, "learning_rate": 2.185356064547219e-06, "loss": 87.7939, "step": 89260 }, { "epoch": 0.7384704471191629, "grad_norm": 1047.531494140625, "learning_rate": 2.1841905005317893e-06, "loss": 76.6948, "step": 89270 }, { "epoch": 0.7385531703685321, "grad_norm": 455.1351623535156, "learning_rate": 2.1830251605573984e-06, "loss": 57.0161, "step": 89280 }, { "epoch": 0.7386358936179013, "grad_norm": 902.6798095703125, "learning_rate": 2.181860044716767e-06, "loss": 77.3232, "step": 89290 }, { "epoch": 0.7387186168672706, "grad_norm": 1158.1624755859375, "learning_rate": 2.180695153102599e-06, "loss": 94.8786, "step": 89300 }, { "epoch": 0.7388013401166398, "grad_norm": 644.2076416015625, "learning_rate": 2.179530485807578e-06, "loss": 104.4438, "step": 89310 }, { "epoch": 0.738884063366009, "grad_norm": 919.8838500976562, "learning_rate": 2.1783660429243747e-06, "loss": 127.025, "step": 89320 }, { "epoch": 0.7389667866153783, "grad_norm": 1036.4383544921875, "learning_rate": 2.177201824545632e-06, "loss": 100.3359, "step": 89330 }, { "epoch": 0.7390495098647475, "grad_norm": 775.053955078125, "learning_rate": 2.1760378307639867e-06, "loss": 101.0328, "step": 89340 }, { "epoch": 0.7391322331141167, "grad_norm": 891.0201416015625, "learning_rate": 2.174874061672051e-06, "loss": 80.9924, "step": 89350 }, { "epoch": 0.739214956363486, "grad_norm": 919.8565063476562, "learning_rate": 2.173710517362421e-06, "loss": 103.8065, "step": 89360 }, { "epoch": 0.7392976796128552, "grad_norm": 565.3506469726562, "learning_rate": 2.1725471979276734e-06, "loss": 72.2678, "step": 89370 }, { "epoch": 0.7393804028622244, "grad_norm": 666.4319458007812, "learning_rate": 2.17138410346037e-06, "loss": 75.9782, "step": 89380 }, { "epoch": 0.7394631261115937, "grad_norm": 745.9927368164062, "learning_rate": 2.1702212340530515e-06, "loss": 111.4429, "step": 89390 }, { "epoch": 0.7395458493609629, "grad_norm": 968.5650024414062, "learning_rate": 2.1690585897982423e-06, "loss": 100.4713, "step": 89400 }, { "epoch": 0.7396285726103321, "grad_norm": 831.1185302734375, "learning_rate": 2.167896170788451e-06, "loss": 62.0955, "step": 89410 }, { "epoch": 0.7397112958597014, "grad_norm": 1192.0394287109375, "learning_rate": 2.16673397711616e-06, "loss": 110.7184, "step": 89420 }, { "epoch": 0.7397940191090706, "grad_norm": 985.3240966796875, "learning_rate": 2.165572008873845e-06, "loss": 95.5798, "step": 89430 }, { "epoch": 0.7398767423584398, "grad_norm": 285.2931213378906, "learning_rate": 2.1644102661539573e-06, "loss": 76.2906, "step": 89440 }, { "epoch": 0.7399594656078091, "grad_norm": 1738.4462890625, "learning_rate": 2.1632487490489314e-06, "loss": 90.5874, "step": 89450 }, { "epoch": 0.7400421888571783, "grad_norm": 746.9173583984375, "learning_rate": 2.162087457651183e-06, "loss": 92.9749, "step": 89460 }, { "epoch": 0.7401249121065475, "grad_norm": 530.8255004882812, "learning_rate": 2.1609263920531115e-06, "loss": 77.7234, "step": 89470 }, { "epoch": 0.7402076353559168, "grad_norm": 0.0, "learning_rate": 2.159765552347098e-06, "loss": 63.3263, "step": 89480 }, { "epoch": 0.740290358605286, "grad_norm": 911.8055419921875, "learning_rate": 2.1586049386255036e-06, "loss": 127.8942, "step": 89490 }, { "epoch": 0.7403730818546552, "grad_norm": 723.2269897460938, "learning_rate": 2.1574445509806764e-06, "loss": 89.7021, "step": 89500 }, { "epoch": 0.7404558051040245, "grad_norm": 584.854736328125, "learning_rate": 2.156284389504939e-06, "loss": 96.6398, "step": 89510 }, { "epoch": 0.7405385283533937, "grad_norm": 399.6296691894531, "learning_rate": 2.1551244542905995e-06, "loss": 63.739, "step": 89520 }, { "epoch": 0.7406212516027629, "grad_norm": 729.1290893554688, "learning_rate": 2.1539647454299535e-06, "loss": 64.4943, "step": 89530 }, { "epoch": 0.7407039748521322, "grad_norm": 1142.3839111328125, "learning_rate": 2.152805263015271e-06, "loss": 98.2541, "step": 89540 }, { "epoch": 0.7407866981015014, "grad_norm": 761.3294067382812, "learning_rate": 2.1516460071388062e-06, "loss": 78.2914, "step": 89550 }, { "epoch": 0.7408694213508706, "grad_norm": 777.5975341796875, "learning_rate": 2.1504869778927965e-06, "loss": 86.9927, "step": 89560 }, { "epoch": 0.74095214460024, "grad_norm": 767.6286010742188, "learning_rate": 2.149328175369461e-06, "loss": 74.6266, "step": 89570 }, { "epoch": 0.7410348678496091, "grad_norm": 1057.6436767578125, "learning_rate": 2.148169599661001e-06, "loss": 79.5687, "step": 89580 }, { "epoch": 0.7411175910989783, "grad_norm": 512.66015625, "learning_rate": 2.147011250859597e-06, "loss": 92.7999, "step": 89590 }, { "epoch": 0.7412003143483477, "grad_norm": 878.7671508789062, "learning_rate": 2.1458531290574138e-06, "loss": 114.662, "step": 89600 }, { "epoch": 0.7412830375977169, "grad_norm": 781.1104125976562, "learning_rate": 2.144695234346598e-06, "loss": 89.1826, "step": 89610 }, { "epoch": 0.741365760847086, "grad_norm": 868.6851196289062, "learning_rate": 2.143537566819277e-06, "loss": 94.632, "step": 89620 }, { "epoch": 0.7414484840964554, "grad_norm": 1113.6846923828125, "learning_rate": 2.1423801265675643e-06, "loss": 114.5532, "step": 89630 }, { "epoch": 0.7415312073458246, "grad_norm": 987.2115478515625, "learning_rate": 2.14122291368355e-06, "loss": 101.5493, "step": 89640 }, { "epoch": 0.7416139305951938, "grad_norm": 961.3772583007812, "learning_rate": 2.1400659282593083e-06, "loss": 84.6896, "step": 89650 }, { "epoch": 0.7416966538445631, "grad_norm": 910.09033203125, "learning_rate": 2.1389091703868954e-06, "loss": 99.8653, "step": 89660 }, { "epoch": 0.7417793770939323, "grad_norm": 1119.726318359375, "learning_rate": 2.137752640158351e-06, "loss": 99.0549, "step": 89670 }, { "epoch": 0.7418621003433015, "grad_norm": 716.283447265625, "learning_rate": 2.136596337665691e-06, "loss": 94.7824, "step": 89680 }, { "epoch": 0.7419448235926707, "grad_norm": 1223.6080322265625, "learning_rate": 2.13544026300092e-06, "loss": 125.8443, "step": 89690 }, { "epoch": 0.74202754684204, "grad_norm": 1124.6939697265625, "learning_rate": 2.13428441625602e-06, "loss": 81.6936, "step": 89700 }, { "epoch": 0.7421102700914092, "grad_norm": 617.048095703125, "learning_rate": 2.1331287975229574e-06, "loss": 88.8294, "step": 89710 }, { "epoch": 0.7421929933407784, "grad_norm": 373.8196105957031, "learning_rate": 2.131973406893677e-06, "loss": 72.712, "step": 89720 }, { "epoch": 0.7422757165901477, "grad_norm": 483.8746643066406, "learning_rate": 2.1308182444601126e-06, "loss": 92.3466, "step": 89730 }, { "epoch": 0.7423584398395169, "grad_norm": 708.9064331054688, "learning_rate": 2.1296633103141724e-06, "loss": 105.1006, "step": 89740 }, { "epoch": 0.7424411630888861, "grad_norm": 1248.171142578125, "learning_rate": 2.1285086045477515e-06, "loss": 82.8471, "step": 89750 }, { "epoch": 0.7425238863382554, "grad_norm": 773.7487182617188, "learning_rate": 2.12735412725272e-06, "loss": 86.4886, "step": 89760 }, { "epoch": 0.7426066095876246, "grad_norm": 810.4151000976562, "learning_rate": 2.1261998785209382e-06, "loss": 83.5308, "step": 89770 }, { "epoch": 0.7426893328369938, "grad_norm": 606.5125732421875, "learning_rate": 2.125045858444242e-06, "loss": 60.5102, "step": 89780 }, { "epoch": 0.7427720560863631, "grad_norm": 420.3428955078125, "learning_rate": 2.1238920671144534e-06, "loss": 87.7662, "step": 89790 }, { "epoch": 0.7428547793357323, "grad_norm": 703.5516357421875, "learning_rate": 2.122738504623373e-06, "loss": 106.8324, "step": 89800 }, { "epoch": 0.7429375025851015, "grad_norm": 1249.0703125, "learning_rate": 2.121585171062785e-06, "loss": 86.0322, "step": 89810 }, { "epoch": 0.7430202258344708, "grad_norm": 1007.9696655273438, "learning_rate": 2.1204320665244533e-06, "loss": 83.4045, "step": 89820 }, { "epoch": 0.74310294908384, "grad_norm": 926.9172973632812, "learning_rate": 2.119279191100128e-06, "loss": 96.8335, "step": 89830 }, { "epoch": 0.7431856723332092, "grad_norm": 743.4456176757812, "learning_rate": 2.1181265448815388e-06, "loss": 71.0544, "step": 89840 }, { "epoch": 0.7432683955825785, "grad_norm": 1535.4232177734375, "learning_rate": 2.1169741279603927e-06, "loss": 123.924, "step": 89850 }, { "epoch": 0.7433511188319477, "grad_norm": 1149.1324462890625, "learning_rate": 2.1158219404283836e-06, "loss": 84.1763, "step": 89860 }, { "epoch": 0.7434338420813169, "grad_norm": 632.4330444335938, "learning_rate": 2.1146699823771867e-06, "loss": 66.9591, "step": 89870 }, { "epoch": 0.7435165653306862, "grad_norm": 1532.528076171875, "learning_rate": 2.1135182538984565e-06, "loss": 107.8088, "step": 89880 }, { "epoch": 0.7435992885800554, "grad_norm": 1111.4703369140625, "learning_rate": 2.1123667550838322e-06, "loss": 78.3602, "step": 89890 }, { "epoch": 0.7436820118294246, "grad_norm": 775.93994140625, "learning_rate": 2.1112154860249327e-06, "loss": 82.3048, "step": 89900 }, { "epoch": 0.7437647350787939, "grad_norm": 840.6805419921875, "learning_rate": 2.1100644468133574e-06, "loss": 63.061, "step": 89910 }, { "epoch": 0.7438474583281631, "grad_norm": 707.5786743164062, "learning_rate": 2.1089136375406934e-06, "loss": 85.1319, "step": 89920 }, { "epoch": 0.7439301815775323, "grad_norm": 895.3738403320312, "learning_rate": 2.107763058298504e-06, "loss": 109.1324, "step": 89930 }, { "epoch": 0.7440129048269016, "grad_norm": 349.6246643066406, "learning_rate": 2.106612709178333e-06, "loss": 71.1513, "step": 89940 }, { "epoch": 0.7440956280762708, "grad_norm": 1578.1759033203125, "learning_rate": 2.10546259027171e-06, "loss": 89.1613, "step": 89950 }, { "epoch": 0.74417835132564, "grad_norm": 891.1797485351562, "learning_rate": 2.1043127016701442e-06, "loss": 84.265, "step": 89960 }, { "epoch": 0.7442610745750093, "grad_norm": 481.6813659667969, "learning_rate": 2.1031630434651277e-06, "loss": 81.8503, "step": 89970 }, { "epoch": 0.7443437978243785, "grad_norm": 1180.74169921875, "learning_rate": 2.102013615748133e-06, "loss": 103.7244, "step": 89980 }, { "epoch": 0.7444265210737477, "grad_norm": 867.1542358398438, "learning_rate": 2.1008644186106146e-06, "loss": 77.129, "step": 89990 }, { "epoch": 0.744509244323117, "grad_norm": 0.0, "learning_rate": 2.09971545214401e-06, "loss": 64.4092, "step": 90000 }, { "epoch": 0.7445919675724862, "grad_norm": 453.9908142089844, "learning_rate": 2.0985667164397355e-06, "loss": 56.7564, "step": 90010 }, { "epoch": 0.7446746908218554, "grad_norm": 363.618896484375, "learning_rate": 2.0974182115891924e-06, "loss": 91.9298, "step": 90020 }, { "epoch": 0.7447574140712248, "grad_norm": 873.480224609375, "learning_rate": 2.0962699376837604e-06, "loss": 74.911, "step": 90030 }, { "epoch": 0.744840137320594, "grad_norm": 507.69085693359375, "learning_rate": 2.0951218948148034e-06, "loss": 99.115, "step": 90040 }, { "epoch": 0.7449228605699632, "grad_norm": 1094.7142333984375, "learning_rate": 2.093974083073666e-06, "loss": 78.3589, "step": 90050 }, { "epoch": 0.7450055838193325, "grad_norm": 914.5409545898438, "learning_rate": 2.0928265025516737e-06, "loss": 77.5211, "step": 90060 }, { "epoch": 0.7450883070687017, "grad_norm": 433.937744140625, "learning_rate": 2.0916791533401344e-06, "loss": 117.6988, "step": 90070 }, { "epoch": 0.7451710303180709, "grad_norm": 891.2142944335938, "learning_rate": 2.090532035530337e-06, "loss": 70.6285, "step": 90080 }, { "epoch": 0.7452537535674402, "grad_norm": 684.631103515625, "learning_rate": 2.0893851492135536e-06, "loss": 66.5324, "step": 90090 }, { "epoch": 0.7453364768168094, "grad_norm": 881.5846557617188, "learning_rate": 2.0882384944810358e-06, "loss": 112.9255, "step": 90100 }, { "epoch": 0.7454192000661786, "grad_norm": 773.873291015625, "learning_rate": 2.087092071424017e-06, "loss": 105.3122, "step": 90110 }, { "epoch": 0.7455019233155479, "grad_norm": 1046.751708984375, "learning_rate": 2.085945880133715e-06, "loss": 73.7329, "step": 90120 }, { "epoch": 0.7455846465649171, "grad_norm": 880.5443725585938, "learning_rate": 2.0847999207013247e-06, "loss": 77.2783, "step": 90130 }, { "epoch": 0.7456673698142863, "grad_norm": 625.60107421875, "learning_rate": 2.083654193218026e-06, "loss": 91.1625, "step": 90140 }, { "epoch": 0.7457500930636556, "grad_norm": 1652.64697265625, "learning_rate": 2.0825086977749793e-06, "loss": 76.7295, "step": 90150 }, { "epoch": 0.7458328163130248, "grad_norm": 902.3091430664062, "learning_rate": 2.0813634344633256e-06, "loss": 83.2874, "step": 90160 }, { "epoch": 0.745915539562394, "grad_norm": 697.8639526367188, "learning_rate": 2.0802184033741886e-06, "loss": 94.8818, "step": 90170 }, { "epoch": 0.7459982628117633, "grad_norm": 1140.19287109375, "learning_rate": 2.0790736045986737e-06, "loss": 84.9298, "step": 90180 }, { "epoch": 0.7460809860611325, "grad_norm": 509.9717712402344, "learning_rate": 2.077929038227867e-06, "loss": 78.4466, "step": 90190 }, { "epoch": 0.7461637093105017, "grad_norm": 620.2896118164062, "learning_rate": 2.076784704352835e-06, "loss": 67.6305, "step": 90200 }, { "epoch": 0.746246432559871, "grad_norm": 776.4671020507812, "learning_rate": 2.075640603064629e-06, "loss": 123.8405, "step": 90210 }, { "epoch": 0.7463291558092402, "grad_norm": 863.0435180664062, "learning_rate": 2.07449673445428e-06, "loss": 90.1876, "step": 90220 }, { "epoch": 0.7464118790586094, "grad_norm": 1178.6463623046875, "learning_rate": 2.0733530986127985e-06, "loss": 101.5113, "step": 90230 }, { "epoch": 0.7464946023079787, "grad_norm": 1176.33203125, "learning_rate": 2.07220969563118e-06, "loss": 90.9558, "step": 90240 }, { "epoch": 0.7465773255573479, "grad_norm": 1168.3426513671875, "learning_rate": 2.0710665256003994e-06, "loss": 117.3465, "step": 90250 }, { "epoch": 0.7466600488067171, "grad_norm": 598.4938354492188, "learning_rate": 2.069923588611413e-06, "loss": 88.4196, "step": 90260 }, { "epoch": 0.7467427720560864, "grad_norm": 908.4249877929688, "learning_rate": 2.068780884755161e-06, "loss": 71.9552, "step": 90270 }, { "epoch": 0.7468254953054556, "grad_norm": 610.5745849609375, "learning_rate": 2.0676384141225586e-06, "loss": 112.2541, "step": 90280 }, { "epoch": 0.7469082185548248, "grad_norm": 722.8043212890625, "learning_rate": 2.066496176804511e-06, "loss": 77.0322, "step": 90290 }, { "epoch": 0.7469909418041941, "grad_norm": 637.941650390625, "learning_rate": 2.0653541728919002e-06, "loss": 97.5822, "step": 90300 }, { "epoch": 0.7470736650535633, "grad_norm": 1051.8499755859375, "learning_rate": 2.0642124024755895e-06, "loss": 78.8491, "step": 90310 }, { "epoch": 0.7471563883029325, "grad_norm": 1052.2393798828125, "learning_rate": 2.0630708656464245e-06, "loss": 98.3979, "step": 90320 }, { "epoch": 0.7472391115523018, "grad_norm": 490.3679504394531, "learning_rate": 2.0619295624952318e-06, "loss": 66.2834, "step": 90330 }, { "epoch": 0.747321834801671, "grad_norm": 892.2313232421875, "learning_rate": 2.0607884931128205e-06, "loss": 76.3967, "step": 90340 }, { "epoch": 0.7474045580510402, "grad_norm": 1203.5242919921875, "learning_rate": 2.059647657589979e-06, "loss": 84.8439, "step": 90350 }, { "epoch": 0.7474872813004095, "grad_norm": 890.0416870117188, "learning_rate": 2.0585070560174807e-06, "loss": 74.8649, "step": 90360 }, { "epoch": 0.7475700045497787, "grad_norm": 819.0018310546875, "learning_rate": 2.057366688486073e-06, "loss": 88.5144, "step": 90370 }, { "epoch": 0.7476527277991479, "grad_norm": 1071.549072265625, "learning_rate": 2.056226555086495e-06, "loss": 79.6231, "step": 90380 }, { "epoch": 0.7477354510485172, "grad_norm": 949.044677734375, "learning_rate": 2.0550866559094597e-06, "loss": 107.752, "step": 90390 }, { "epoch": 0.7478181742978864, "grad_norm": 852.3134155273438, "learning_rate": 2.053946991045664e-06, "loss": 70.428, "step": 90400 }, { "epoch": 0.7479008975472556, "grad_norm": 280.0954284667969, "learning_rate": 2.0528075605857855e-06, "loss": 89.7523, "step": 90410 }, { "epoch": 0.7479836207966248, "grad_norm": 1142.2628173828125, "learning_rate": 2.0516683646204836e-06, "loss": 93.6377, "step": 90420 }, { "epoch": 0.7480663440459941, "grad_norm": 561.314453125, "learning_rate": 2.0505294032403987e-06, "loss": 98.4227, "step": 90430 }, { "epoch": 0.7481490672953633, "grad_norm": 538.2353515625, "learning_rate": 2.0493906765361556e-06, "loss": 81.2393, "step": 90440 }, { "epoch": 0.7482317905447325, "grad_norm": 1096.204345703125, "learning_rate": 2.0482521845983522e-06, "loss": 92.642, "step": 90450 }, { "epoch": 0.7483145137941019, "grad_norm": 855.7373046875, "learning_rate": 2.047113927517576e-06, "loss": 88.2958, "step": 90460 }, { "epoch": 0.748397237043471, "grad_norm": 829.8861083984375, "learning_rate": 2.0459759053843913e-06, "loss": 84.8314, "step": 90470 }, { "epoch": 0.7484799602928403, "grad_norm": 499.6122741699219, "learning_rate": 2.0448381182893485e-06, "loss": 87.0093, "step": 90480 }, { "epoch": 0.7485626835422096, "grad_norm": 1019.7319946289062, "learning_rate": 2.043700566322974e-06, "loss": 91.0243, "step": 90490 }, { "epoch": 0.7486454067915788, "grad_norm": 1175.784912109375, "learning_rate": 2.0425632495757776e-06, "loss": 85.5139, "step": 90500 }, { "epoch": 0.748728130040948, "grad_norm": 1392.78759765625, "learning_rate": 2.0414261681382507e-06, "loss": 93.0289, "step": 90510 }, { "epoch": 0.7488108532903173, "grad_norm": 986.0319213867188, "learning_rate": 2.0402893221008657e-06, "loss": 91.0319, "step": 90520 }, { "epoch": 0.7488935765396865, "grad_norm": 1051.900146484375, "learning_rate": 2.0391527115540777e-06, "loss": 103.051, "step": 90530 }, { "epoch": 0.7489762997890557, "grad_norm": 724.2633056640625, "learning_rate": 2.0380163365883188e-06, "loss": 93.0482, "step": 90540 }, { "epoch": 0.749059023038425, "grad_norm": 1194.2574462890625, "learning_rate": 2.0368801972940055e-06, "loss": 113.0047, "step": 90550 }, { "epoch": 0.7491417462877942, "grad_norm": 970.729736328125, "learning_rate": 2.0357442937615367e-06, "loss": 93.03, "step": 90560 }, { "epoch": 0.7492244695371634, "grad_norm": 687.0914916992188, "learning_rate": 2.034608626081288e-06, "loss": 87.3318, "step": 90570 }, { "epoch": 0.7493071927865327, "grad_norm": 732.1859130859375, "learning_rate": 2.0334731943436235e-06, "loss": 84.7605, "step": 90580 }, { "epoch": 0.7493899160359019, "grad_norm": 1996.309814453125, "learning_rate": 2.032337998638883e-06, "loss": 117.3526, "step": 90590 }, { "epoch": 0.7494726392852711, "grad_norm": 1726.9215087890625, "learning_rate": 2.031203039057388e-06, "loss": 91.3886, "step": 90600 }, { "epoch": 0.7495553625346404, "grad_norm": 1001.2156372070312, "learning_rate": 2.0300683156894435e-06, "loss": 94.4224, "step": 90610 }, { "epoch": 0.7496380857840096, "grad_norm": 1375.769775390625, "learning_rate": 2.028933828625332e-06, "loss": 69.2318, "step": 90620 }, { "epoch": 0.7497208090333788, "grad_norm": 1265.17919921875, "learning_rate": 2.0277995779553193e-06, "loss": 77.7545, "step": 90630 }, { "epoch": 0.7498035322827481, "grad_norm": 631.8235473632812, "learning_rate": 2.026665563769655e-06, "loss": 100.6061, "step": 90640 }, { "epoch": 0.7498862555321173, "grad_norm": 845.4620971679688, "learning_rate": 2.025531786158565e-06, "loss": 123.6531, "step": 90650 }, { "epoch": 0.7499689787814865, "grad_norm": 950.5982055664062, "learning_rate": 2.02439824521226e-06, "loss": 89.5225, "step": 90660 }, { "epoch": 0.7500517020308558, "grad_norm": 1280.9688720703125, "learning_rate": 2.023264941020929e-06, "loss": 139.323, "step": 90670 }, { "epoch": 0.750134425280225, "grad_norm": 989.0846557617188, "learning_rate": 2.022131873674747e-06, "loss": 88.5975, "step": 90680 }, { "epoch": 0.7502171485295942, "grad_norm": 905.4840087890625, "learning_rate": 2.020999043263865e-06, "loss": 87.5264, "step": 90690 }, { "epoch": 0.7502998717789635, "grad_norm": 810.5401611328125, "learning_rate": 2.0198664498784194e-06, "loss": 93.4054, "step": 90700 }, { "epoch": 0.7503825950283327, "grad_norm": 836.0730590820312, "learning_rate": 2.018734093608521e-06, "loss": 88.2675, "step": 90710 }, { "epoch": 0.7504653182777019, "grad_norm": 629.2024536132812, "learning_rate": 2.017601974544269e-06, "loss": 64.2829, "step": 90720 }, { "epoch": 0.7505480415270712, "grad_norm": 597.7532348632812, "learning_rate": 2.0164700927757407e-06, "loss": 80.799, "step": 90730 }, { "epoch": 0.7506307647764404, "grad_norm": 818.9828491210938, "learning_rate": 2.0153384483929946e-06, "loss": 89.5458, "step": 90740 }, { "epoch": 0.7507134880258096, "grad_norm": 1135.253662109375, "learning_rate": 2.0142070414860704e-06, "loss": 113.3995, "step": 90750 }, { "epoch": 0.7507962112751789, "grad_norm": 1244.027587890625, "learning_rate": 2.0130758721449887e-06, "loss": 104.453, "step": 90760 }, { "epoch": 0.7508789345245481, "grad_norm": 731.18505859375, "learning_rate": 2.01194494045975e-06, "loss": 71.3096, "step": 90770 }, { "epoch": 0.7509616577739173, "grad_norm": 613.3550415039062, "learning_rate": 2.0108142465203413e-06, "loss": 84.1263, "step": 90780 }, { "epoch": 0.7510443810232866, "grad_norm": 1097.283935546875, "learning_rate": 2.0096837904167252e-06, "loss": 87.5751, "step": 90790 }, { "epoch": 0.7511271042726558, "grad_norm": 1566.380615234375, "learning_rate": 2.0085535722388454e-06, "loss": 91.1146, "step": 90800 }, { "epoch": 0.751209827522025, "grad_norm": 623.9690551757812, "learning_rate": 2.007423592076629e-06, "loss": 108.3401, "step": 90810 }, { "epoch": 0.7512925507713943, "grad_norm": 744.1317749023438, "learning_rate": 2.006293850019983e-06, "loss": 98.0858, "step": 90820 }, { "epoch": 0.7513752740207635, "grad_norm": 626.7324829101562, "learning_rate": 2.005164346158796e-06, "loss": 86.4027, "step": 90830 }, { "epoch": 0.7514579972701327, "grad_norm": 633.4637451171875, "learning_rate": 2.004035080582938e-06, "loss": 84.082, "step": 90840 }, { "epoch": 0.751540720519502, "grad_norm": 663.2689819335938, "learning_rate": 2.002906053382258e-06, "loss": 68.8682, "step": 90850 }, { "epoch": 0.7516234437688712, "grad_norm": 998.2990112304688, "learning_rate": 2.001777264646588e-06, "loss": 89.2042, "step": 90860 }, { "epoch": 0.7517061670182404, "grad_norm": 1075.44384765625, "learning_rate": 2.000648714465744e-06, "loss": 72.4316, "step": 90870 }, { "epoch": 0.7517888902676098, "grad_norm": 646.1034545898438, "learning_rate": 1.9995204029295147e-06, "loss": 84.8476, "step": 90880 }, { "epoch": 0.751871613516979, "grad_norm": 634.7570190429688, "learning_rate": 1.9983923301276764e-06, "loss": 76.6496, "step": 90890 }, { "epoch": 0.7519543367663482, "grad_norm": 586.5011596679688, "learning_rate": 1.9972644961499853e-06, "loss": 68.339, "step": 90900 }, { "epoch": 0.7520370600157175, "grad_norm": 1039.88037109375, "learning_rate": 1.9961369010861777e-06, "loss": 81.439, "step": 90910 }, { "epoch": 0.7521197832650867, "grad_norm": 638.9922485351562, "learning_rate": 1.995009545025971e-06, "loss": 94.2341, "step": 90920 }, { "epoch": 0.7522025065144559, "grad_norm": 710.9326171875, "learning_rate": 1.9938824280590635e-06, "loss": 81.596, "step": 90930 }, { "epoch": 0.7522852297638252, "grad_norm": 1036.0745849609375, "learning_rate": 1.992755550275135e-06, "loss": 91.3788, "step": 90940 }, { "epoch": 0.7523679530131944, "grad_norm": 930.1054077148438, "learning_rate": 1.991628911763846e-06, "loss": 102.8395, "step": 90950 }, { "epoch": 0.7524506762625636, "grad_norm": 1212.5616455078125, "learning_rate": 1.990502512614838e-06, "loss": 120.0019, "step": 90960 }, { "epoch": 0.7525333995119329, "grad_norm": 708.0130004882812, "learning_rate": 1.989376352917733e-06, "loss": 83.0712, "step": 90970 }, { "epoch": 0.7526161227613021, "grad_norm": 1019.6633911132812, "learning_rate": 1.988250432762135e-06, "loss": 73.8178, "step": 90980 }, { "epoch": 0.7526988460106713, "grad_norm": 602.7006225585938, "learning_rate": 1.987124752237628e-06, "loss": 84.8277, "step": 90990 }, { "epoch": 0.7527815692600406, "grad_norm": 908.8017578125, "learning_rate": 1.9859993114337773e-06, "loss": 100.6673, "step": 91000 }, { "epoch": 0.7528642925094098, "grad_norm": 3168.759765625, "learning_rate": 1.984874110440129e-06, "loss": 106.8103, "step": 91010 }, { "epoch": 0.752947015758779, "grad_norm": 741.0905151367188, "learning_rate": 1.9837491493462104e-06, "loss": 112.0769, "step": 91020 }, { "epoch": 0.7530297390081483, "grad_norm": 875.8446044921875, "learning_rate": 1.9826244282415285e-06, "loss": 77.1232, "step": 91030 }, { "epoch": 0.7531124622575175, "grad_norm": 884.2897338867188, "learning_rate": 1.9814999472155736e-06, "loss": 76.4609, "step": 91040 }, { "epoch": 0.7531951855068867, "grad_norm": 476.9480895996094, "learning_rate": 1.9803757063578146e-06, "loss": 84.1212, "step": 91050 }, { "epoch": 0.753277908756256, "grad_norm": 548.7337646484375, "learning_rate": 1.9792517057577026e-06, "loss": 67.7463, "step": 91060 }, { "epoch": 0.7533606320056252, "grad_norm": 1195.18505859375, "learning_rate": 1.978127945504669e-06, "loss": 96.1024, "step": 91070 }, { "epoch": 0.7534433552549944, "grad_norm": 1155.169677734375, "learning_rate": 1.977004425688126e-06, "loss": 95.7761, "step": 91080 }, { "epoch": 0.7535260785043637, "grad_norm": 591.861572265625, "learning_rate": 1.9758811463974677e-06, "loss": 64.2986, "step": 91090 }, { "epoch": 0.7536088017537329, "grad_norm": 794.584228515625, "learning_rate": 1.9747581077220675e-06, "loss": 79.2889, "step": 91100 }, { "epoch": 0.7536915250031021, "grad_norm": 891.2854614257812, "learning_rate": 1.9736353097512802e-06, "loss": 80.2451, "step": 91110 }, { "epoch": 0.7537742482524713, "grad_norm": 474.7909851074219, "learning_rate": 1.9725127525744423e-06, "loss": 114.1455, "step": 91120 }, { "epoch": 0.7538569715018406, "grad_norm": 843.3524169921875, "learning_rate": 1.971390436280871e-06, "loss": 97.2716, "step": 91130 }, { "epoch": 0.7539396947512098, "grad_norm": 1710.1583251953125, "learning_rate": 1.970268360959863e-06, "loss": 77.38, "step": 91140 }, { "epoch": 0.754022418000579, "grad_norm": 711.1730346679688, "learning_rate": 1.9691465267006965e-06, "loss": 80.5333, "step": 91150 }, { "epoch": 0.7541051412499483, "grad_norm": 941.439697265625, "learning_rate": 1.9680249335926314e-06, "loss": 66.0788, "step": 91160 }, { "epoch": 0.7541878644993175, "grad_norm": 922.4462890625, "learning_rate": 1.9669035817249077e-06, "loss": 88.3642, "step": 91170 }, { "epoch": 0.7542705877486867, "grad_norm": 713.2926025390625, "learning_rate": 1.9657824711867457e-06, "loss": 68.5765, "step": 91180 }, { "epoch": 0.754353310998056, "grad_norm": 617.7564697265625, "learning_rate": 1.9646616020673474e-06, "loss": 73.7422, "step": 91190 }, { "epoch": 0.7544360342474252, "grad_norm": 1247.7928466796875, "learning_rate": 1.9635409744558953e-06, "loss": 80.8417, "step": 91200 }, { "epoch": 0.7545187574967944, "grad_norm": 888.484375, "learning_rate": 1.962420588441552e-06, "loss": 80.5122, "step": 91210 }, { "epoch": 0.7546014807461637, "grad_norm": 944.42626953125, "learning_rate": 1.9613004441134635e-06, "loss": 69.0574, "step": 91220 }, { "epoch": 0.7546842039955329, "grad_norm": 731.5653076171875, "learning_rate": 1.96018054156075e-06, "loss": 108.5219, "step": 91230 }, { "epoch": 0.7547669272449021, "grad_norm": 0.0, "learning_rate": 1.9590608808725214e-06, "loss": 79.5218, "step": 91240 }, { "epoch": 0.7548496504942714, "grad_norm": 578.6743774414062, "learning_rate": 1.9579414621378624e-06, "loss": 90.3885, "step": 91250 }, { "epoch": 0.7549323737436406, "grad_norm": 1231.1759033203125, "learning_rate": 1.9568222854458403e-06, "loss": 74.3492, "step": 91260 }, { "epoch": 0.7550150969930098, "grad_norm": 1043.411376953125, "learning_rate": 1.955703350885502e-06, "loss": 93.9092, "step": 91270 }, { "epoch": 0.7550978202423791, "grad_norm": 570.8914794921875, "learning_rate": 1.954584658545877e-06, "loss": 78.577, "step": 91280 }, { "epoch": 0.7551805434917483, "grad_norm": 1726.863525390625, "learning_rate": 1.9534662085159746e-06, "loss": 119.8196, "step": 91290 }, { "epoch": 0.7552632667411175, "grad_norm": 722.1109619140625, "learning_rate": 1.9523480008847856e-06, "loss": 83.1329, "step": 91300 }, { "epoch": 0.7553459899904869, "grad_norm": 1024.208740234375, "learning_rate": 1.9512300357412778e-06, "loss": 82.1837, "step": 91310 }, { "epoch": 0.755428713239856, "grad_norm": 743.8002319335938, "learning_rate": 1.950112313174404e-06, "loss": 95.0024, "step": 91320 }, { "epoch": 0.7555114364892253, "grad_norm": 1056.7789306640625, "learning_rate": 1.9489948332730945e-06, "loss": 111.2676, "step": 91330 }, { "epoch": 0.7555941597385946, "grad_norm": 948.68994140625, "learning_rate": 1.947877596126266e-06, "loss": 74.7832, "step": 91340 }, { "epoch": 0.7556768829879638, "grad_norm": 746.3170166015625, "learning_rate": 1.946760601822809e-06, "loss": 77.3798, "step": 91350 }, { "epoch": 0.755759606237333, "grad_norm": 783.4119873046875, "learning_rate": 1.945643850451599e-06, "loss": 91.6843, "step": 91360 }, { "epoch": 0.7558423294867023, "grad_norm": 692.2002563476562, "learning_rate": 1.9445273421014903e-06, "loss": 82.1802, "step": 91370 }, { "epoch": 0.7559250527360715, "grad_norm": 1240.4085693359375, "learning_rate": 1.9434110768613184e-06, "loss": 102.5016, "step": 91380 }, { "epoch": 0.7560077759854407, "grad_norm": 1118.1903076171875, "learning_rate": 1.9422950548199004e-06, "loss": 82.6145, "step": 91390 }, { "epoch": 0.75609049923481, "grad_norm": 552.0459594726562, "learning_rate": 1.941179276066031e-06, "loss": 90.4393, "step": 91400 }, { "epoch": 0.7561732224841792, "grad_norm": 917.4848022460938, "learning_rate": 1.9400637406884875e-06, "loss": 90.3351, "step": 91410 }, { "epoch": 0.7562559457335484, "grad_norm": 276.276611328125, "learning_rate": 1.938948448776028e-06, "loss": 111.6006, "step": 91420 }, { "epoch": 0.7563386689829177, "grad_norm": 597.5177001953125, "learning_rate": 1.9378334004173936e-06, "loss": 78.6461, "step": 91430 }, { "epoch": 0.7564213922322869, "grad_norm": 742.5587768554688, "learning_rate": 1.9367185957013024e-06, "loss": 89.7729, "step": 91440 }, { "epoch": 0.7565041154816561, "grad_norm": 1106.80712890625, "learning_rate": 1.9356040347164533e-06, "loss": 76.2414, "step": 91450 }, { "epoch": 0.7565868387310254, "grad_norm": 1360.2635498046875, "learning_rate": 1.9344897175515283e-06, "loss": 100.4726, "step": 91460 }, { "epoch": 0.7566695619803946, "grad_norm": 615.8472290039062, "learning_rate": 1.9333756442951886e-06, "loss": 62.704, "step": 91470 }, { "epoch": 0.7567522852297638, "grad_norm": 864.1071166992188, "learning_rate": 1.9322618150360732e-06, "loss": 108.7585, "step": 91480 }, { "epoch": 0.7568350084791331, "grad_norm": 797.296142578125, "learning_rate": 1.931148229862807e-06, "loss": 66.7689, "step": 91490 }, { "epoch": 0.7569177317285023, "grad_norm": 944.4542846679688, "learning_rate": 1.9300348888639915e-06, "loss": 90.2655, "step": 91500 }, { "epoch": 0.7570004549778715, "grad_norm": 973.5896606445312, "learning_rate": 1.9289217921282104e-06, "loss": 81.9597, "step": 91510 }, { "epoch": 0.7570831782272408, "grad_norm": 1060.939208984375, "learning_rate": 1.927808939744027e-06, "loss": 129.8845, "step": 91520 }, { "epoch": 0.75716590147661, "grad_norm": 847.8220825195312, "learning_rate": 1.9266963317999884e-06, "loss": 77.1575, "step": 91530 }, { "epoch": 0.7572486247259792, "grad_norm": 810.7078857421875, "learning_rate": 1.9255839683846174e-06, "loss": 90.8755, "step": 91540 }, { "epoch": 0.7573313479753485, "grad_norm": 339.5457763671875, "learning_rate": 1.9244718495864206e-06, "loss": 58.7832, "step": 91550 }, { "epoch": 0.7574140712247177, "grad_norm": 1120.6597900390625, "learning_rate": 1.9233599754938857e-06, "loss": 107.0604, "step": 91560 }, { "epoch": 0.7574967944740869, "grad_norm": 1043.0989990234375, "learning_rate": 1.922248346195477e-06, "loss": 109.1071, "step": 91570 }, { "epoch": 0.7575795177234562, "grad_norm": 790.0960693359375, "learning_rate": 1.921136961779641e-06, "loss": 90.9298, "step": 91580 }, { "epoch": 0.7576622409728254, "grad_norm": 814.7366333007812, "learning_rate": 1.9200258223348072e-06, "loss": 87.8823, "step": 91590 }, { "epoch": 0.7577449642221946, "grad_norm": 446.8509826660156, "learning_rate": 1.918914927949384e-06, "loss": 77.4138, "step": 91600 }, { "epoch": 0.7578276874715639, "grad_norm": 1098.19091796875, "learning_rate": 1.9178042787117594e-06, "loss": 95.8741, "step": 91610 }, { "epoch": 0.7579104107209331, "grad_norm": 479.6676940917969, "learning_rate": 1.9166938747103013e-06, "loss": 90.7424, "step": 91620 }, { "epoch": 0.7579931339703023, "grad_norm": 925.6350708007812, "learning_rate": 1.915583716033363e-06, "loss": 101.3752, "step": 91630 }, { "epoch": 0.7580758572196716, "grad_norm": 1013.79541015625, "learning_rate": 1.9144738027692746e-06, "loss": 101.7984, "step": 91640 }, { "epoch": 0.7581585804690408, "grad_norm": 697.2052001953125, "learning_rate": 1.913364135006343e-06, "loss": 97.0362, "step": 91650 }, { "epoch": 0.75824130371841, "grad_norm": 703.0621337890625, "learning_rate": 1.9122547128328616e-06, "loss": 85.1144, "step": 91660 }, { "epoch": 0.7583240269677793, "grad_norm": 562.5040893554688, "learning_rate": 1.9111455363371016e-06, "loss": 90.2374, "step": 91670 }, { "epoch": 0.7584067502171485, "grad_norm": 906.6265869140625, "learning_rate": 1.910036605607316e-06, "loss": 72.353, "step": 91680 }, { "epoch": 0.7584894734665177, "grad_norm": 1488.5152587890625, "learning_rate": 1.908927920731736e-06, "loss": 77.0585, "step": 91690 }, { "epoch": 0.758572196715887, "grad_norm": 573.968505859375, "learning_rate": 1.9078194817985755e-06, "loss": 68.6668, "step": 91700 }, { "epoch": 0.7586549199652562, "grad_norm": 791.8029174804688, "learning_rate": 1.9067112888960283e-06, "loss": 73.1811, "step": 91710 }, { "epoch": 0.7587376432146254, "grad_norm": 743.7737426757812, "learning_rate": 1.905603342112265e-06, "loss": 89.2683, "step": 91720 }, { "epoch": 0.7588203664639948, "grad_norm": 863.6655883789062, "learning_rate": 1.904495641535446e-06, "loss": 93.7095, "step": 91730 }, { "epoch": 0.758903089713364, "grad_norm": 533.572509765625, "learning_rate": 1.9033881872537009e-06, "loss": 94.9646, "step": 91740 }, { "epoch": 0.7589858129627332, "grad_norm": 859.637939453125, "learning_rate": 1.902280979355146e-06, "loss": 75.9259, "step": 91750 }, { "epoch": 0.7590685362121025, "grad_norm": 591.394287109375, "learning_rate": 1.901174017927877e-06, "loss": 85.4813, "step": 91760 }, { "epoch": 0.7591512594614717, "grad_norm": 832.2922973632812, "learning_rate": 1.9000673030599698e-06, "loss": 91.3511, "step": 91770 }, { "epoch": 0.7592339827108409, "grad_norm": 355.1169128417969, "learning_rate": 1.89896083483948e-06, "loss": 132.036, "step": 91780 }, { "epoch": 0.7593167059602102, "grad_norm": 925.5943603515625, "learning_rate": 1.897854613354445e-06, "loss": 80.6945, "step": 91790 }, { "epoch": 0.7593994292095794, "grad_norm": 1204.190185546875, "learning_rate": 1.8967486386928819e-06, "loss": 118.005, "step": 91800 }, { "epoch": 0.7594821524589486, "grad_norm": 320.41064453125, "learning_rate": 1.8956429109427855e-06, "loss": 93.1453, "step": 91810 }, { "epoch": 0.7595648757083179, "grad_norm": 1259.0718994140625, "learning_rate": 1.8945374301921393e-06, "loss": 104.1626, "step": 91820 }, { "epoch": 0.7596475989576871, "grad_norm": 1038.4957275390625, "learning_rate": 1.893432196528896e-06, "loss": 71.0405, "step": 91830 }, { "epoch": 0.7597303222070563, "grad_norm": 818.2511596679688, "learning_rate": 1.892327210040995e-06, "loss": 86.6824, "step": 91840 }, { "epoch": 0.7598130454564255, "grad_norm": 662.8837280273438, "learning_rate": 1.8912224708163561e-06, "loss": 79.6246, "step": 91850 }, { "epoch": 0.7598957687057948, "grad_norm": 484.0934143066406, "learning_rate": 1.890117978942878e-06, "loss": 66.859, "step": 91860 }, { "epoch": 0.759978491955164, "grad_norm": 1255.5438232421875, "learning_rate": 1.8890137345084392e-06, "loss": 99.873, "step": 91870 }, { "epoch": 0.7600612152045332, "grad_norm": 563.4220581054688, "learning_rate": 1.8879097376009009e-06, "loss": 68.9941, "step": 91880 }, { "epoch": 0.7601439384539025, "grad_norm": 736.5455932617188, "learning_rate": 1.8868059883081015e-06, "loss": 69.8224, "step": 91890 }, { "epoch": 0.7602266617032717, "grad_norm": 1109.2657470703125, "learning_rate": 1.8857024867178625e-06, "loss": 69.6023, "step": 91900 }, { "epoch": 0.7603093849526409, "grad_norm": 710.873291015625, "learning_rate": 1.8845992329179835e-06, "loss": 75.4405, "step": 91910 }, { "epoch": 0.7603921082020102, "grad_norm": 739.780517578125, "learning_rate": 1.883496226996246e-06, "loss": 71.1245, "step": 91920 }, { "epoch": 0.7604748314513794, "grad_norm": 533.0693359375, "learning_rate": 1.8823934690404106e-06, "loss": 86.3577, "step": 91930 }, { "epoch": 0.7605575547007486, "grad_norm": 1472.7764892578125, "learning_rate": 1.8812909591382195e-06, "loss": 94.3954, "step": 91940 }, { "epoch": 0.7606402779501179, "grad_norm": 326.43096923828125, "learning_rate": 1.8801886973773936e-06, "loss": 101.5206, "step": 91950 }, { "epoch": 0.7607230011994871, "grad_norm": 762.7881469726562, "learning_rate": 1.8790866838456351e-06, "loss": 72.6627, "step": 91960 }, { "epoch": 0.7608057244488563, "grad_norm": 612.1148071289062, "learning_rate": 1.877984918630626e-06, "loss": 104.0263, "step": 91970 }, { "epoch": 0.7608884476982256, "grad_norm": 1866.867919921875, "learning_rate": 1.876883401820029e-06, "loss": 111.1947, "step": 91980 }, { "epoch": 0.7609711709475948, "grad_norm": 1281.453369140625, "learning_rate": 1.8757821335014858e-06, "loss": 104.3547, "step": 91990 }, { "epoch": 0.761053894196964, "grad_norm": 890.40869140625, "learning_rate": 1.8746811137626208e-06, "loss": 82.3355, "step": 92000 }, { "epoch": 0.7611366174463333, "grad_norm": 807.6094970703125, "learning_rate": 1.8735803426910366e-06, "loss": 93.4271, "step": 92010 }, { "epoch": 0.7612193406957025, "grad_norm": 970.5708618164062, "learning_rate": 1.8724798203743154e-06, "loss": 91.6451, "step": 92020 }, { "epoch": 0.7613020639450717, "grad_norm": 688.900146484375, "learning_rate": 1.8713795469000218e-06, "loss": 100.542, "step": 92030 }, { "epoch": 0.761384787194441, "grad_norm": 870.4006958007812, "learning_rate": 1.8702795223556992e-06, "loss": 88.1513, "step": 92040 }, { "epoch": 0.7614675104438102, "grad_norm": 558.2523803710938, "learning_rate": 1.8691797468288713e-06, "loss": 84.1693, "step": 92050 }, { "epoch": 0.7615502336931794, "grad_norm": 627.4422607421875, "learning_rate": 1.8680802204070432e-06, "loss": 82.1566, "step": 92060 }, { "epoch": 0.7616329569425487, "grad_norm": 792.007568359375, "learning_rate": 1.8669809431776991e-06, "loss": 103.5347, "step": 92070 }, { "epoch": 0.7617156801919179, "grad_norm": 1743.6444091796875, "learning_rate": 1.8658819152283003e-06, "loss": 108.218, "step": 92080 }, { "epoch": 0.7617984034412871, "grad_norm": 701.8472900390625, "learning_rate": 1.8647831366462948e-06, "loss": 75.0169, "step": 92090 }, { "epoch": 0.7618811266906564, "grad_norm": 555.6596069335938, "learning_rate": 1.8636846075191067e-06, "loss": 91.5314, "step": 92100 }, { "epoch": 0.7619638499400256, "grad_norm": 1169.95263671875, "learning_rate": 1.8625863279341406e-06, "loss": 90.8289, "step": 92110 }, { "epoch": 0.7620465731893948, "grad_norm": 1040.67724609375, "learning_rate": 1.8614882979787818e-06, "loss": 75.7882, "step": 92120 }, { "epoch": 0.7621292964387641, "grad_norm": 737.9378051757812, "learning_rate": 1.8603905177403953e-06, "loss": 109.6155, "step": 92130 }, { "epoch": 0.7622120196881333, "grad_norm": 1175.499267578125, "learning_rate": 1.8592929873063259e-06, "loss": 77.9201, "step": 92140 }, { "epoch": 0.7622947429375025, "grad_norm": 1110.6258544921875, "learning_rate": 1.8581957067639e-06, "loss": 110.9532, "step": 92150 }, { "epoch": 0.7623774661868719, "grad_norm": 1796.15625, "learning_rate": 1.8570986762004246e-06, "loss": 90.3984, "step": 92160 }, { "epoch": 0.762460189436241, "grad_norm": 778.6599731445312, "learning_rate": 1.8560018957031816e-06, "loss": 98.082, "step": 92170 }, { "epoch": 0.7625429126856103, "grad_norm": 2528.74560546875, "learning_rate": 1.8549053653594373e-06, "loss": 116.1548, "step": 92180 }, { "epoch": 0.7626256359349796, "grad_norm": 684.8113403320312, "learning_rate": 1.8538090852564405e-06, "loss": 72.5543, "step": 92190 }, { "epoch": 0.7627083591843488, "grad_norm": 712.4026489257812, "learning_rate": 1.852713055481416e-06, "loss": 71.634, "step": 92200 }, { "epoch": 0.762791082433718, "grad_norm": 856.0159301757812, "learning_rate": 1.8516172761215695e-06, "loss": 80.4302, "step": 92210 }, { "epoch": 0.7628738056830873, "grad_norm": 690.3129272460938, "learning_rate": 1.8505217472640868e-06, "loss": 68.3549, "step": 92220 }, { "epoch": 0.7629565289324565, "grad_norm": 787.6735229492188, "learning_rate": 1.849426468996135e-06, "loss": 87.8287, "step": 92230 }, { "epoch": 0.7630392521818257, "grad_norm": 442.70745849609375, "learning_rate": 1.8483314414048597e-06, "loss": 102.3068, "step": 92240 }, { "epoch": 0.763121975431195, "grad_norm": 812.5242919921875, "learning_rate": 1.8472366645773892e-06, "loss": 117.692, "step": 92250 }, { "epoch": 0.7632046986805642, "grad_norm": 605.50927734375, "learning_rate": 1.846142138600826e-06, "loss": 69.203, "step": 92260 }, { "epoch": 0.7632874219299334, "grad_norm": 907.07568359375, "learning_rate": 1.8450478635622592e-06, "loss": 139.9277, "step": 92270 }, { "epoch": 0.7633701451793027, "grad_norm": 1189.9276123046875, "learning_rate": 1.8439538395487528e-06, "loss": 87.1735, "step": 92280 }, { "epoch": 0.7634528684286719, "grad_norm": 575.260498046875, "learning_rate": 1.842860066647356e-06, "loss": 58.7681, "step": 92290 }, { "epoch": 0.7635355916780411, "grad_norm": 1187.8050537109375, "learning_rate": 1.841766544945095e-06, "loss": 131.3646, "step": 92300 }, { "epoch": 0.7636183149274104, "grad_norm": 811.5830078125, "learning_rate": 1.8406732745289757e-06, "loss": 83.9469, "step": 92310 }, { "epoch": 0.7637010381767796, "grad_norm": 928.5191040039062, "learning_rate": 1.839580255485985e-06, "loss": 91.3141, "step": 92320 }, { "epoch": 0.7637837614261488, "grad_norm": 1056.0318603515625, "learning_rate": 1.83848748790309e-06, "loss": 89.1773, "step": 92330 }, { "epoch": 0.7638664846755181, "grad_norm": 1123.9605712890625, "learning_rate": 1.8373949718672345e-06, "loss": 61.8712, "step": 92340 }, { "epoch": 0.7639492079248873, "grad_norm": 712.9464721679688, "learning_rate": 1.8363027074653473e-06, "loss": 84.6685, "step": 92350 }, { "epoch": 0.7640319311742565, "grad_norm": 621.0189208984375, "learning_rate": 1.835210694784334e-06, "loss": 127.7506, "step": 92360 }, { "epoch": 0.7641146544236258, "grad_norm": 916.6598510742188, "learning_rate": 1.8341189339110793e-06, "loss": 72.2045, "step": 92370 }, { "epoch": 0.764197377672995, "grad_norm": 1478.23681640625, "learning_rate": 1.8330274249324537e-06, "loss": 96.8306, "step": 92380 }, { "epoch": 0.7642801009223642, "grad_norm": 838.5194702148438, "learning_rate": 1.831936167935301e-06, "loss": 117.9247, "step": 92390 }, { "epoch": 0.7643628241717335, "grad_norm": 834.4838256835938, "learning_rate": 1.8308451630064484e-06, "loss": 98.4779, "step": 92400 }, { "epoch": 0.7644455474211027, "grad_norm": 1103.1943359375, "learning_rate": 1.8297544102327014e-06, "loss": 117.0596, "step": 92410 }, { "epoch": 0.7645282706704719, "grad_norm": 605.728515625, "learning_rate": 1.8286639097008484e-06, "loss": 87.3383, "step": 92420 }, { "epoch": 0.7646109939198412, "grad_norm": 826.8703002929688, "learning_rate": 1.827573661497652e-06, "loss": 79.3055, "step": 92430 }, { "epoch": 0.7646937171692104, "grad_norm": 797.113525390625, "learning_rate": 1.8264836657098595e-06, "loss": 94.8799, "step": 92440 }, { "epoch": 0.7647764404185796, "grad_norm": 1154.000244140625, "learning_rate": 1.8253939224241974e-06, "loss": 84.2387, "step": 92450 }, { "epoch": 0.7648591636679489, "grad_norm": 1213.9759521484375, "learning_rate": 1.8243044317273717e-06, "loss": 61.1165, "step": 92460 }, { "epoch": 0.7649418869173181, "grad_norm": 634.3380737304688, "learning_rate": 1.823215193706066e-06, "loss": 87.2058, "step": 92470 }, { "epoch": 0.7650246101666873, "grad_norm": 807.1724853515625, "learning_rate": 1.82212620844695e-06, "loss": 88.5422, "step": 92480 }, { "epoch": 0.7651073334160566, "grad_norm": 1034.462158203125, "learning_rate": 1.8210374760366662e-06, "loss": 87.1022, "step": 92490 }, { "epoch": 0.7651900566654258, "grad_norm": 922.4021606445312, "learning_rate": 1.8199489965618433e-06, "loss": 101.4626, "step": 92500 }, { "epoch": 0.765272779914795, "grad_norm": 820.8185424804688, "learning_rate": 1.8188607701090827e-06, "loss": 80.0996, "step": 92510 }, { "epoch": 0.7653555031641643, "grad_norm": 701.537841796875, "learning_rate": 1.8177727967649705e-06, "loss": 83.0882, "step": 92520 }, { "epoch": 0.7654382264135335, "grad_norm": 935.3859252929688, "learning_rate": 1.816685076616073e-06, "loss": 68.9125, "step": 92530 }, { "epoch": 0.7655209496629027, "grad_norm": 758.0145874023438, "learning_rate": 1.8155976097489342e-06, "loss": 80.4841, "step": 92540 }, { "epoch": 0.765603672912272, "grad_norm": 479.95880126953125, "learning_rate": 1.8145103962500792e-06, "loss": 108.154, "step": 92550 }, { "epoch": 0.7656863961616412, "grad_norm": 1647.0076904296875, "learning_rate": 1.8134234362060128e-06, "loss": 97.0746, "step": 92560 }, { "epoch": 0.7657691194110104, "grad_norm": 589.4857177734375, "learning_rate": 1.8123367297032175e-06, "loss": 90.745, "step": 92570 }, { "epoch": 0.7658518426603796, "grad_norm": 579.7984008789062, "learning_rate": 1.8112502768281608e-06, "loss": 82.4168, "step": 92580 }, { "epoch": 0.765934565909749, "grad_norm": 426.4167785644531, "learning_rate": 1.810164077667287e-06, "loss": 84.039, "step": 92590 }, { "epoch": 0.7660172891591182, "grad_norm": 865.5641479492188, "learning_rate": 1.809078132307016e-06, "loss": 79.1781, "step": 92600 }, { "epoch": 0.7661000124084874, "grad_norm": 1119.6689453125, "learning_rate": 1.807992440833754e-06, "loss": 72.2057, "step": 92610 }, { "epoch": 0.7661827356578567, "grad_norm": 851.24365234375, "learning_rate": 1.8069070033338842e-06, "loss": 87.8442, "step": 92620 }, { "epoch": 0.7662654589072259, "grad_norm": 1012.4996337890625, "learning_rate": 1.8058218198937695e-06, "loss": 104.0138, "step": 92630 }, { "epoch": 0.7663481821565951, "grad_norm": 2512.142578125, "learning_rate": 1.8047368905997536e-06, "loss": 90.26, "step": 92640 }, { "epoch": 0.7664309054059644, "grad_norm": 612.3758544921875, "learning_rate": 1.8036522155381592e-06, "loss": 140.536, "step": 92650 }, { "epoch": 0.7665136286553336, "grad_norm": 825.9151611328125, "learning_rate": 1.8025677947952879e-06, "loss": 106.3884, "step": 92660 }, { "epoch": 0.7665963519047028, "grad_norm": 806.4419555664062, "learning_rate": 1.8014836284574223e-06, "loss": 96.8457, "step": 92670 }, { "epoch": 0.7666790751540721, "grad_norm": 366.78570556640625, "learning_rate": 1.8003997166108278e-06, "loss": 76.3395, "step": 92680 }, { "epoch": 0.7667617984034413, "grad_norm": 1280.156982421875, "learning_rate": 1.7993160593417424e-06, "loss": 104.953, "step": 92690 }, { "epoch": 0.7668445216528105, "grad_norm": 1448.958984375, "learning_rate": 1.798232656736389e-06, "loss": 86.1547, "step": 92700 }, { "epoch": 0.7669272449021798, "grad_norm": 430.016845703125, "learning_rate": 1.7971495088809688e-06, "loss": 80.0066, "step": 92710 }, { "epoch": 0.767009968151549, "grad_norm": 611.8375244140625, "learning_rate": 1.796066615861663e-06, "loss": 82.3525, "step": 92720 }, { "epoch": 0.7670926914009182, "grad_norm": 163.59913635253906, "learning_rate": 1.7949839777646327e-06, "loss": 87.4548, "step": 92730 }, { "epoch": 0.7671754146502875, "grad_norm": 830.4721069335938, "learning_rate": 1.7939015946760186e-06, "loss": 94.6959, "step": 92740 }, { "epoch": 0.7672581378996567, "grad_norm": 602.9876098632812, "learning_rate": 1.7928194666819398e-06, "loss": 62.9848, "step": 92750 }, { "epoch": 0.7673408611490259, "grad_norm": 942.3362426757812, "learning_rate": 1.7917375938684979e-06, "loss": 89.7564, "step": 92760 }, { "epoch": 0.7674235843983952, "grad_norm": 1193.48486328125, "learning_rate": 1.7906559763217713e-06, "loss": 79.8312, "step": 92770 }, { "epoch": 0.7675063076477644, "grad_norm": 819.6219482421875, "learning_rate": 1.7895746141278198e-06, "loss": 80.9784, "step": 92780 }, { "epoch": 0.7675890308971336, "grad_norm": 716.1319580078125, "learning_rate": 1.7884935073726822e-06, "loss": 96.7796, "step": 92790 }, { "epoch": 0.7676717541465029, "grad_norm": 862.4302368164062, "learning_rate": 1.7874126561423771e-06, "loss": 66.9127, "step": 92800 }, { "epoch": 0.7677544773958721, "grad_norm": 323.0167236328125, "learning_rate": 1.786332060522904e-06, "loss": 83.0131, "step": 92810 }, { "epoch": 0.7678372006452413, "grad_norm": 819.0519409179688, "learning_rate": 1.7852517206002396e-06, "loss": 93.2481, "step": 92820 }, { "epoch": 0.7679199238946106, "grad_norm": 551.5955810546875, "learning_rate": 1.7841716364603423e-06, "loss": 84.1821, "step": 92830 }, { "epoch": 0.7680026471439798, "grad_norm": 791.5858154296875, "learning_rate": 1.783091808189149e-06, "loss": 107.4551, "step": 92840 }, { "epoch": 0.768085370393349, "grad_norm": 1241.2384033203125, "learning_rate": 1.7820122358725772e-06, "loss": 107.9027, "step": 92850 }, { "epoch": 0.7681680936427183, "grad_norm": 390.0361633300781, "learning_rate": 1.780932919596523e-06, "loss": 86.1765, "step": 92860 }, { "epoch": 0.7682508168920875, "grad_norm": 390.01922607421875, "learning_rate": 1.779853859446863e-06, "loss": 97.645, "step": 92870 }, { "epoch": 0.7683335401414567, "grad_norm": 877.824462890625, "learning_rate": 1.778775055509453e-06, "loss": 121.0466, "step": 92880 }, { "epoch": 0.768416263390826, "grad_norm": 679.44580078125, "learning_rate": 1.777696507870128e-06, "loss": 88.3246, "step": 92890 }, { "epoch": 0.7684989866401952, "grad_norm": 1261.235595703125, "learning_rate": 1.776618216614704e-06, "loss": 91.5801, "step": 92900 }, { "epoch": 0.7685817098895644, "grad_norm": 1093.8907470703125, "learning_rate": 1.7755401818289748e-06, "loss": 82.9775, "step": 92910 }, { "epoch": 0.7686644331389337, "grad_norm": 567.2011108398438, "learning_rate": 1.774462403598715e-06, "loss": 104.8191, "step": 92920 }, { "epoch": 0.7687471563883029, "grad_norm": 864.6884155273438, "learning_rate": 1.7733848820096789e-06, "loss": 72.2253, "step": 92930 }, { "epoch": 0.7688298796376721, "grad_norm": 781.83740234375, "learning_rate": 1.7723076171475995e-06, "loss": 97.3354, "step": 92940 }, { "epoch": 0.7689126028870414, "grad_norm": 622.6500854492188, "learning_rate": 1.7712306090981896e-06, "loss": 131.4019, "step": 92950 }, { "epoch": 0.7689953261364106, "grad_norm": 603.605712890625, "learning_rate": 1.7701538579471423e-06, "loss": 64.7121, "step": 92960 }, { "epoch": 0.7690780493857798, "grad_norm": 691.0977783203125, "learning_rate": 1.7690773637801295e-06, "loss": 89.4358, "step": 92970 }, { "epoch": 0.7691607726351491, "grad_norm": 857.383056640625, "learning_rate": 1.768001126682803e-06, "loss": 98.0366, "step": 92980 }, { "epoch": 0.7692434958845183, "grad_norm": 2012.945556640625, "learning_rate": 1.7669251467407938e-06, "loss": 76.4047, "step": 92990 }, { "epoch": 0.7693262191338875, "grad_norm": 356.16473388671875, "learning_rate": 1.7658494240397127e-06, "loss": 73.393, "step": 93000 }, { "epoch": 0.7694089423832569, "grad_norm": 853.9513549804688, "learning_rate": 1.7647739586651508e-06, "loss": 91.1541, "step": 93010 }, { "epoch": 0.769491665632626, "grad_norm": 880.5347900390625, "learning_rate": 1.7636987507026787e-06, "loss": 117.1849, "step": 93020 }, { "epoch": 0.7695743888819953, "grad_norm": 714.1282958984375, "learning_rate": 1.762623800237841e-06, "loss": 121.1683, "step": 93030 }, { "epoch": 0.7696571121313646, "grad_norm": 2766.218994140625, "learning_rate": 1.7615491073561714e-06, "loss": 75.8225, "step": 93040 }, { "epoch": 0.7697398353807338, "grad_norm": 723.0350952148438, "learning_rate": 1.760474672143177e-06, "loss": 83.1655, "step": 93050 }, { "epoch": 0.769822558630103, "grad_norm": 864.4639892578125, "learning_rate": 1.7594004946843458e-06, "loss": 71.9456, "step": 93060 }, { "epoch": 0.7699052818794723, "grad_norm": 1015.13330078125, "learning_rate": 1.7583265750651446e-06, "loss": 125.6941, "step": 93070 }, { "epoch": 0.7699880051288415, "grad_norm": 551.4693603515625, "learning_rate": 1.7572529133710204e-06, "loss": 68.586, "step": 93080 }, { "epoch": 0.7700707283782107, "grad_norm": 509.70941162109375, "learning_rate": 1.7561795096874002e-06, "loss": 66.0901, "step": 93090 }, { "epoch": 0.77015345162758, "grad_norm": 1378.9339599609375, "learning_rate": 1.755106364099689e-06, "loss": 88.016, "step": 93100 }, { "epoch": 0.7702361748769492, "grad_norm": 1190.9852294921875, "learning_rate": 1.7540334766932738e-06, "loss": 79.6336, "step": 93110 }, { "epoch": 0.7703188981263184, "grad_norm": 805.7669677734375, "learning_rate": 1.7529608475535165e-06, "loss": 89.6155, "step": 93120 }, { "epoch": 0.7704016213756877, "grad_norm": 520.67724609375, "learning_rate": 1.7518884767657612e-06, "loss": 62.7607, "step": 93130 }, { "epoch": 0.7704843446250569, "grad_norm": 1119.815673828125, "learning_rate": 1.7508163644153342e-06, "loss": 97.3919, "step": 93140 }, { "epoch": 0.7705670678744261, "grad_norm": 592.175048828125, "learning_rate": 1.7497445105875377e-06, "loss": 81.5456, "step": 93150 }, { "epoch": 0.7706497911237954, "grad_norm": 603.5911254882812, "learning_rate": 1.7486729153676536e-06, "loss": 70.6593, "step": 93160 }, { "epoch": 0.7707325143731646, "grad_norm": 762.583251953125, "learning_rate": 1.7476015788409439e-06, "loss": 93.7638, "step": 93170 }, { "epoch": 0.7708152376225338, "grad_norm": 1937.641845703125, "learning_rate": 1.7465305010926503e-06, "loss": 100.976, "step": 93180 }, { "epoch": 0.7708979608719031, "grad_norm": 1821.3037109375, "learning_rate": 1.745459682207995e-06, "loss": 98.9063, "step": 93190 }, { "epoch": 0.7709806841212723, "grad_norm": 418.82037353515625, "learning_rate": 1.7443891222721749e-06, "loss": 96.1245, "step": 93200 }, { "epoch": 0.7710634073706415, "grad_norm": 596.7721557617188, "learning_rate": 1.7433188213703712e-06, "loss": 83.4011, "step": 93210 }, { "epoch": 0.7711461306200108, "grad_norm": 824.0478515625, "learning_rate": 1.7422487795877424e-06, "loss": 104.4605, "step": 93220 }, { "epoch": 0.77122885386938, "grad_norm": 780.7681884765625, "learning_rate": 1.7411789970094257e-06, "loss": 109.6282, "step": 93230 }, { "epoch": 0.7713115771187492, "grad_norm": 721.4907836914062, "learning_rate": 1.7401094737205415e-06, "loss": 127.5015, "step": 93240 }, { "epoch": 0.7713943003681185, "grad_norm": 0.0, "learning_rate": 1.739040209806186e-06, "loss": 103.458, "step": 93250 }, { "epoch": 0.7714770236174877, "grad_norm": 1141.449462890625, "learning_rate": 1.7379712053514352e-06, "loss": 91.0078, "step": 93260 }, { "epoch": 0.7715597468668569, "grad_norm": 780.21337890625, "learning_rate": 1.736902460441345e-06, "loss": 83.8483, "step": 93270 }, { "epoch": 0.7716424701162262, "grad_norm": 1003.1300048828125, "learning_rate": 1.735833975160952e-06, "loss": 91.4468, "step": 93280 }, { "epoch": 0.7717251933655954, "grad_norm": 672.5834350585938, "learning_rate": 1.7347657495952675e-06, "loss": 95.7494, "step": 93290 }, { "epoch": 0.7718079166149646, "grad_norm": 1039.33544921875, "learning_rate": 1.7336977838292867e-06, "loss": 83.7675, "step": 93300 }, { "epoch": 0.7718906398643338, "grad_norm": 1989.511474609375, "learning_rate": 1.7326300779479826e-06, "loss": 78.2224, "step": 93310 }, { "epoch": 0.7719733631137031, "grad_norm": 427.48193359375, "learning_rate": 1.731562632036307e-06, "loss": 89.7143, "step": 93320 }, { "epoch": 0.7720560863630723, "grad_norm": 573.6722412109375, "learning_rate": 1.730495446179194e-06, "loss": 85.7981, "step": 93330 }, { "epoch": 0.7721388096124415, "grad_norm": 937.4099731445312, "learning_rate": 1.7294285204615536e-06, "loss": 76.9562, "step": 93340 }, { "epoch": 0.7722215328618108, "grad_norm": 931.5322265625, "learning_rate": 1.7283618549682757e-06, "loss": 73.166, "step": 93350 }, { "epoch": 0.77230425611118, "grad_norm": 915.3590087890625, "learning_rate": 1.727295449784232e-06, "loss": 78.7844, "step": 93360 }, { "epoch": 0.7723869793605492, "grad_norm": 1507.6265869140625, "learning_rate": 1.726229304994268e-06, "loss": 77.3544, "step": 93370 }, { "epoch": 0.7724697026099185, "grad_norm": 809.1603393554688, "learning_rate": 1.7251634206832135e-06, "loss": 81.6114, "step": 93380 }, { "epoch": 0.7725524258592877, "grad_norm": 410.7778015136719, "learning_rate": 1.7240977969358757e-06, "loss": 87.7018, "step": 93390 }, { "epoch": 0.7726351491086569, "grad_norm": 805.246826171875, "learning_rate": 1.7230324338370425e-06, "loss": 80.5176, "step": 93400 }, { "epoch": 0.7727178723580262, "grad_norm": 687.7548828125, "learning_rate": 1.721967331471479e-06, "loss": 79.3853, "step": 93410 }, { "epoch": 0.7728005956073954, "grad_norm": 941.4087524414062, "learning_rate": 1.7209024899239297e-06, "loss": 81.4306, "step": 93420 }, { "epoch": 0.7728833188567646, "grad_norm": 947.3975830078125, "learning_rate": 1.7198379092791213e-06, "loss": 79.1074, "step": 93430 }, { "epoch": 0.772966042106134, "grad_norm": 1059.6492919921875, "learning_rate": 1.7187735896217567e-06, "loss": 75.3719, "step": 93440 }, { "epoch": 0.7730487653555032, "grad_norm": 853.2528076171875, "learning_rate": 1.7177095310365205e-06, "loss": 69.0534, "step": 93450 }, { "epoch": 0.7731314886048724, "grad_norm": 940.2296142578125, "learning_rate": 1.7166457336080716e-06, "loss": 70.2273, "step": 93460 }, { "epoch": 0.7732142118542417, "grad_norm": 934.7996215820312, "learning_rate": 1.715582197421053e-06, "loss": 77.2213, "step": 93470 }, { "epoch": 0.7732969351036109, "grad_norm": 550.6903076171875, "learning_rate": 1.7145189225600856e-06, "loss": 76.0427, "step": 93480 }, { "epoch": 0.7733796583529801, "grad_norm": 1751.7384033203125, "learning_rate": 1.7134559091097691e-06, "loss": 95.2524, "step": 93490 }, { "epoch": 0.7734623816023494, "grad_norm": 1242.5704345703125, "learning_rate": 1.7123931571546826e-06, "loss": 85.0259, "step": 93500 }, { "epoch": 0.7735451048517186, "grad_norm": 1116.49365234375, "learning_rate": 1.711330666779385e-06, "loss": 93.0945, "step": 93510 }, { "epoch": 0.7736278281010878, "grad_norm": 1309.2081298828125, "learning_rate": 1.7102684380684109e-06, "loss": 102.1733, "step": 93520 }, { "epoch": 0.7737105513504571, "grad_norm": 610.8143920898438, "learning_rate": 1.7092064711062816e-06, "loss": 83.7248, "step": 93530 }, { "epoch": 0.7737932745998263, "grad_norm": 656.4087524414062, "learning_rate": 1.708144765977492e-06, "loss": 83.5821, "step": 93540 }, { "epoch": 0.7738759978491955, "grad_norm": 2868.773193359375, "learning_rate": 1.7070833227665146e-06, "loss": 86.4508, "step": 93550 }, { "epoch": 0.7739587210985648, "grad_norm": 547.4402465820312, "learning_rate": 1.7060221415578042e-06, "loss": 96.0463, "step": 93560 }, { "epoch": 0.774041444347934, "grad_norm": 983.4544067382812, "learning_rate": 1.7049612224357954e-06, "loss": 72.9841, "step": 93570 }, { "epoch": 0.7741241675973032, "grad_norm": 447.38299560546875, "learning_rate": 1.703900565484899e-06, "loss": 75.4073, "step": 93580 }, { "epoch": 0.7742068908466725, "grad_norm": 797.6534423828125, "learning_rate": 1.7028401707895082e-06, "loss": 72.9507, "step": 93590 }, { "epoch": 0.7742896140960417, "grad_norm": 536.6268310546875, "learning_rate": 1.7017800384339928e-06, "loss": 96.6598, "step": 93600 }, { "epoch": 0.7743723373454109, "grad_norm": 601.033935546875, "learning_rate": 1.700720168502703e-06, "loss": 94.863, "step": 93610 }, { "epoch": 0.7744550605947802, "grad_norm": 1244.7003173828125, "learning_rate": 1.6996605610799682e-06, "loss": 89.0981, "step": 93620 }, { "epoch": 0.7745377838441494, "grad_norm": 423.31646728515625, "learning_rate": 1.6986012162500953e-06, "loss": 90.755, "step": 93630 }, { "epoch": 0.7746205070935186, "grad_norm": 583.6690673828125, "learning_rate": 1.697542134097373e-06, "loss": 102.8274, "step": 93640 }, { "epoch": 0.7747032303428879, "grad_norm": 1013.60693359375, "learning_rate": 1.6964833147060661e-06, "loss": 76.9443, "step": 93650 }, { "epoch": 0.7747859535922571, "grad_norm": 1234.383056640625, "learning_rate": 1.6954247581604216e-06, "loss": 84.067, "step": 93660 }, { "epoch": 0.7748686768416263, "grad_norm": 1029.810791015625, "learning_rate": 1.6943664645446622e-06, "loss": 89.6185, "step": 93670 }, { "epoch": 0.7749514000909956, "grad_norm": 1082.3262939453125, "learning_rate": 1.6933084339429935e-06, "loss": 88.7879, "step": 93680 }, { "epoch": 0.7750341233403648, "grad_norm": 1071.9556884765625, "learning_rate": 1.692250666439596e-06, "loss": 78.9346, "step": 93690 }, { "epoch": 0.775116846589734, "grad_norm": 677.65771484375, "learning_rate": 1.6911931621186329e-06, "loss": 82.8995, "step": 93700 }, { "epoch": 0.7751995698391033, "grad_norm": 867.5599365234375, "learning_rate": 1.6901359210642444e-06, "loss": 78.1593, "step": 93710 }, { "epoch": 0.7752822930884725, "grad_norm": 942.0477905273438, "learning_rate": 1.6890789433605508e-06, "loss": 94.003, "step": 93720 }, { "epoch": 0.7753650163378417, "grad_norm": 519.2806396484375, "learning_rate": 1.6880222290916503e-06, "loss": 60.9571, "step": 93730 }, { "epoch": 0.775447739587211, "grad_norm": 774.544921875, "learning_rate": 1.686965778341621e-06, "loss": 84.0111, "step": 93740 }, { "epoch": 0.7755304628365802, "grad_norm": 667.4334716796875, "learning_rate": 1.68590959119452e-06, "loss": 76.5631, "step": 93750 }, { "epoch": 0.7756131860859494, "grad_norm": 467.7774353027344, "learning_rate": 1.6848536677343836e-06, "loss": 60.4382, "step": 93760 }, { "epoch": 0.7756959093353187, "grad_norm": 455.2456970214844, "learning_rate": 1.683798008045226e-06, "loss": 78.7938, "step": 93770 }, { "epoch": 0.7757786325846879, "grad_norm": 1058.66943359375, "learning_rate": 1.6827426122110412e-06, "loss": 78.3543, "step": 93780 }, { "epoch": 0.7758613558340571, "grad_norm": 635.2317504882812, "learning_rate": 1.6816874803158034e-06, "loss": 79.6428, "step": 93790 }, { "epoch": 0.7759440790834264, "grad_norm": 562.6470947265625, "learning_rate": 1.6806326124434634e-06, "loss": 59.4104, "step": 93800 }, { "epoch": 0.7760268023327956, "grad_norm": 832.704833984375, "learning_rate": 1.679578008677953e-06, "loss": 97.8018, "step": 93810 }, { "epoch": 0.7761095255821648, "grad_norm": 826.8822631835938, "learning_rate": 1.6785236691031808e-06, "loss": 66.9371, "step": 93820 }, { "epoch": 0.7761922488315341, "grad_norm": 717.45068359375, "learning_rate": 1.6774695938030378e-06, "loss": 60.8474, "step": 93830 }, { "epoch": 0.7762749720809033, "grad_norm": 893.1639404296875, "learning_rate": 1.6764157828613902e-06, "loss": 94.3922, "step": 93840 }, { "epoch": 0.7763576953302725, "grad_norm": 422.0210876464844, "learning_rate": 1.675362236362086e-06, "loss": 73.2281, "step": 93850 }, { "epoch": 0.7764404185796419, "grad_norm": 989.2019653320312, "learning_rate": 1.6743089543889502e-06, "loss": 103.5385, "step": 93860 }, { "epoch": 0.776523141829011, "grad_norm": 823.348388671875, "learning_rate": 1.6732559370257884e-06, "loss": 91.4836, "step": 93870 }, { "epoch": 0.7766058650783803, "grad_norm": 1299.27392578125, "learning_rate": 1.6722031843563836e-06, "loss": 93.1182, "step": 93880 }, { "epoch": 0.7766885883277496, "grad_norm": 577.8670654296875, "learning_rate": 1.6711506964644992e-06, "loss": 93.8182, "step": 93890 }, { "epoch": 0.7767713115771188, "grad_norm": 502.0442810058594, "learning_rate": 1.6700984734338765e-06, "loss": 68.5818, "step": 93900 }, { "epoch": 0.776854034826488, "grad_norm": 1039.2012939453125, "learning_rate": 1.669046515348236e-06, "loss": 85.6031, "step": 93910 }, { "epoch": 0.7769367580758573, "grad_norm": 390.88385009765625, "learning_rate": 1.6679948222912773e-06, "loss": 79.0019, "step": 93920 }, { "epoch": 0.7770194813252265, "grad_norm": 908.3690795898438, "learning_rate": 1.6669433943466789e-06, "loss": 125.9054, "step": 93930 }, { "epoch": 0.7771022045745957, "grad_norm": 942.0368041992188, "learning_rate": 1.6658922315980975e-06, "loss": 101.4422, "step": 93940 }, { "epoch": 0.777184927823965, "grad_norm": 817.743896484375, "learning_rate": 1.6648413341291703e-06, "loss": 79.908, "step": 93950 }, { "epoch": 0.7772676510733342, "grad_norm": 882.8682250976562, "learning_rate": 1.6637907020235117e-06, "loss": 76.305, "step": 93960 }, { "epoch": 0.7773503743227034, "grad_norm": 1031.4886474609375, "learning_rate": 1.662740335364717e-06, "loss": 126.5732, "step": 93970 }, { "epoch": 0.7774330975720727, "grad_norm": 771.1046142578125, "learning_rate": 1.661690234236355e-06, "loss": 95.6885, "step": 93980 }, { "epoch": 0.7775158208214419, "grad_norm": 716.6741943359375, "learning_rate": 1.6606403987219815e-06, "loss": 78.7987, "step": 93990 }, { "epoch": 0.7775985440708111, "grad_norm": 630.847900390625, "learning_rate": 1.6595908289051266e-06, "loss": 90.0902, "step": 94000 }, { "epoch": 0.7776812673201804, "grad_norm": 845.2430419921875, "learning_rate": 1.6585415248692988e-06, "loss": 88.3763, "step": 94010 }, { "epoch": 0.7777639905695496, "grad_norm": 1075.101318359375, "learning_rate": 1.6574924866979863e-06, "loss": 54.1172, "step": 94020 }, { "epoch": 0.7778467138189188, "grad_norm": 1161.794921875, "learning_rate": 1.6564437144746564e-06, "loss": 78.1358, "step": 94030 }, { "epoch": 0.777929437068288, "grad_norm": 867.4984130859375, "learning_rate": 1.6553952082827562e-06, "loss": 96.2229, "step": 94040 }, { "epoch": 0.7780121603176573, "grad_norm": 909.298583984375, "learning_rate": 1.6543469682057105e-06, "loss": 99.5179, "step": 94050 }, { "epoch": 0.7780948835670265, "grad_norm": 684.060791015625, "learning_rate": 1.6532989943269207e-06, "loss": 81.0873, "step": 94060 }, { "epoch": 0.7781776068163957, "grad_norm": 1213.9482421875, "learning_rate": 1.6522512867297707e-06, "loss": 106.0108, "step": 94070 }, { "epoch": 0.778260330065765, "grad_norm": 980.2380981445312, "learning_rate": 1.6512038454976198e-06, "loss": 85.677, "step": 94080 }, { "epoch": 0.7783430533151342, "grad_norm": 450.2137451171875, "learning_rate": 1.6501566707138116e-06, "loss": 65.2738, "step": 94090 }, { "epoch": 0.7784257765645034, "grad_norm": 525.07177734375, "learning_rate": 1.6491097624616637e-06, "loss": 75.8729, "step": 94100 }, { "epoch": 0.7785084998138727, "grad_norm": 908.7725219726562, "learning_rate": 1.6480631208244735e-06, "loss": 125.1627, "step": 94110 }, { "epoch": 0.7785912230632419, "grad_norm": 468.6520690917969, "learning_rate": 1.6470167458855174e-06, "loss": 110.0471, "step": 94120 }, { "epoch": 0.7786739463126111, "grad_norm": 1324.9775390625, "learning_rate": 1.645970637728051e-06, "loss": 73.7453, "step": 94130 }, { "epoch": 0.7787566695619804, "grad_norm": 1378.573486328125, "learning_rate": 1.6449247964353094e-06, "loss": 115.2216, "step": 94140 }, { "epoch": 0.7788393928113496, "grad_norm": 1270.413330078125, "learning_rate": 1.643879222090502e-06, "loss": 75.4719, "step": 94150 }, { "epoch": 0.7789221160607188, "grad_norm": 823.2882690429688, "learning_rate": 1.642833914776823e-06, "loss": 99.8761, "step": 94160 }, { "epoch": 0.7790048393100881, "grad_norm": 1631.2496337890625, "learning_rate": 1.6417888745774418e-06, "loss": 93.0183, "step": 94170 }, { "epoch": 0.7790875625594573, "grad_norm": 618.7994384765625, "learning_rate": 1.640744101575506e-06, "loss": 87.0626, "step": 94180 }, { "epoch": 0.7791702858088265, "grad_norm": 874.3128051757812, "learning_rate": 1.6396995958541468e-06, "loss": 78.908, "step": 94190 }, { "epoch": 0.7792530090581958, "grad_norm": 1032.2872314453125, "learning_rate": 1.6386553574964691e-06, "loss": 87.541, "step": 94200 }, { "epoch": 0.779335732307565, "grad_norm": 711.1810302734375, "learning_rate": 1.6376113865855585e-06, "loss": 65.8986, "step": 94210 }, { "epoch": 0.7794184555569342, "grad_norm": 1084.5020751953125, "learning_rate": 1.6365676832044796e-06, "loss": 92.7198, "step": 94220 }, { "epoch": 0.7795011788063035, "grad_norm": 877.0111694335938, "learning_rate": 1.6355242474362732e-06, "loss": 100.024, "step": 94230 }, { "epoch": 0.7795839020556727, "grad_norm": 1269.5721435546875, "learning_rate": 1.634481079363961e-06, "loss": 84.5142, "step": 94240 }, { "epoch": 0.7796666253050419, "grad_norm": 683.024169921875, "learning_rate": 1.6334381790705439e-06, "loss": 81.7544, "step": 94250 }, { "epoch": 0.7797493485544112, "grad_norm": 611.2015991210938, "learning_rate": 1.6323955466390001e-06, "loss": 94.3467, "step": 94260 }, { "epoch": 0.7798320718037804, "grad_norm": 724.1978149414062, "learning_rate": 1.6313531821522876e-06, "loss": 93.952, "step": 94270 }, { "epoch": 0.7799147950531496, "grad_norm": 540.2195434570312, "learning_rate": 1.6303110856933413e-06, "loss": 78.9152, "step": 94280 }, { "epoch": 0.779997518302519, "grad_norm": 1860.32666015625, "learning_rate": 1.629269257345078e-06, "loss": 131.9263, "step": 94290 }, { "epoch": 0.7800802415518882, "grad_norm": 987.336181640625, "learning_rate": 1.628227697190391e-06, "loss": 98.8791, "step": 94300 }, { "epoch": 0.7801629648012574, "grad_norm": 608.0608520507812, "learning_rate": 1.6271864053121528e-06, "loss": 141.6185, "step": 94310 }, { "epoch": 0.7802456880506267, "grad_norm": 752.9619140625, "learning_rate": 1.6261453817932122e-06, "loss": 61.8438, "step": 94320 }, { "epoch": 0.7803284112999959, "grad_norm": 849.4782104492188, "learning_rate": 1.6251046267163988e-06, "loss": 94.6276, "step": 94330 }, { "epoch": 0.7804111345493651, "grad_norm": 788.5552368164062, "learning_rate": 1.6240641401645224e-06, "loss": 96.3306, "step": 94340 }, { "epoch": 0.7804938577987344, "grad_norm": 507.6255187988281, "learning_rate": 1.6230239222203687e-06, "loss": 103.3219, "step": 94350 }, { "epoch": 0.7805765810481036, "grad_norm": 920.311767578125, "learning_rate": 1.621983972966703e-06, "loss": 114.5354, "step": 94360 }, { "epoch": 0.7806593042974728, "grad_norm": 1070.4920654296875, "learning_rate": 1.6209442924862684e-06, "loss": 107.5379, "step": 94370 }, { "epoch": 0.7807420275468421, "grad_norm": 1888.6494140625, "learning_rate": 1.6199048808617896e-06, "loss": 89.8358, "step": 94380 }, { "epoch": 0.7808247507962113, "grad_norm": 1030.0611572265625, "learning_rate": 1.6188657381759676e-06, "loss": 86.013, "step": 94390 }, { "epoch": 0.7809074740455805, "grad_norm": 437.46112060546875, "learning_rate": 1.6178268645114826e-06, "loss": 73.3406, "step": 94400 }, { "epoch": 0.7809901972949498, "grad_norm": 701.3662719726562, "learning_rate": 1.6167882599509904e-06, "loss": 88.5935, "step": 94410 }, { "epoch": 0.781072920544319, "grad_norm": 639.0773315429688, "learning_rate": 1.6157499245771296e-06, "loss": 73.676, "step": 94420 }, { "epoch": 0.7811556437936882, "grad_norm": 894.396484375, "learning_rate": 1.6147118584725163e-06, "loss": 90.9503, "step": 94430 }, { "epoch": 0.7812383670430575, "grad_norm": 1062.1143798828125, "learning_rate": 1.6136740617197433e-06, "loss": 90.4783, "step": 94440 }, { "epoch": 0.7813210902924267, "grad_norm": 959.5033569335938, "learning_rate": 1.612636534401384e-06, "loss": 61.4715, "step": 94450 }, { "epoch": 0.7814038135417959, "grad_norm": 1169.7418212890625, "learning_rate": 1.61159927659999e-06, "loss": 77.9506, "step": 94460 }, { "epoch": 0.7814865367911652, "grad_norm": 1380.6806640625, "learning_rate": 1.6105622883980893e-06, "loss": 80.2579, "step": 94470 }, { "epoch": 0.7815692600405344, "grad_norm": 1116.7760009765625, "learning_rate": 1.6095255698781954e-06, "loss": 120.8729, "step": 94480 }, { "epoch": 0.7816519832899036, "grad_norm": 709.1279907226562, "learning_rate": 1.6084891211227899e-06, "loss": 89.6381, "step": 94490 }, { "epoch": 0.7817347065392729, "grad_norm": 626.1630859375, "learning_rate": 1.6074529422143398e-06, "loss": 82.1522, "step": 94500 }, { "epoch": 0.7818174297886421, "grad_norm": 1719.0965576171875, "learning_rate": 1.6064170332352897e-06, "loss": 68.7441, "step": 94510 }, { "epoch": 0.7819001530380113, "grad_norm": 716.7944946289062, "learning_rate": 1.6053813942680618e-06, "loss": 82.9248, "step": 94520 }, { "epoch": 0.7819828762873806, "grad_norm": 828.8280029296875, "learning_rate": 1.604346025395057e-06, "loss": 79.9472, "step": 94530 }, { "epoch": 0.7820655995367498, "grad_norm": 675.82568359375, "learning_rate": 1.6033109266986552e-06, "loss": 118.3841, "step": 94540 }, { "epoch": 0.782148322786119, "grad_norm": 966.230224609375, "learning_rate": 1.602276098261214e-06, "loss": 103.3147, "step": 94550 }, { "epoch": 0.7822310460354883, "grad_norm": 696.0429077148438, "learning_rate": 1.6012415401650706e-06, "loss": 98.5404, "step": 94560 }, { "epoch": 0.7823137692848575, "grad_norm": 739.5772705078125, "learning_rate": 1.6002072524925395e-06, "loss": 78.0328, "step": 94570 }, { "epoch": 0.7823964925342267, "grad_norm": 537.0844116210938, "learning_rate": 1.5991732353259142e-06, "loss": 76.4263, "step": 94580 }, { "epoch": 0.782479215783596, "grad_norm": 462.9024658203125, "learning_rate": 1.598139488747467e-06, "loss": 95.2623, "step": 94590 }, { "epoch": 0.7825619390329652, "grad_norm": 698.7216186523438, "learning_rate": 1.5971060128394483e-06, "loss": 85.2989, "step": 94600 }, { "epoch": 0.7826446622823344, "grad_norm": 887.9057006835938, "learning_rate": 1.596072807684087e-06, "loss": 112.7182, "step": 94610 }, { "epoch": 0.7827273855317037, "grad_norm": 981.1459350585938, "learning_rate": 1.5950398733635903e-06, "loss": 94.8544, "step": 94620 }, { "epoch": 0.7828101087810729, "grad_norm": 1017.7367553710938, "learning_rate": 1.5940072099601446e-06, "loss": 69.3493, "step": 94630 }, { "epoch": 0.7828928320304421, "grad_norm": 593.1333618164062, "learning_rate": 1.5929748175559135e-06, "loss": 60.2959, "step": 94640 }, { "epoch": 0.7829755552798114, "grad_norm": 808.7359008789062, "learning_rate": 1.5919426962330398e-06, "loss": 85.7781, "step": 94650 }, { "epoch": 0.7830582785291806, "grad_norm": 1296.2574462890625, "learning_rate": 1.5909108460736455e-06, "loss": 82.9248, "step": 94660 }, { "epoch": 0.7831410017785498, "grad_norm": 622.1264038085938, "learning_rate": 1.589879267159829e-06, "loss": 74.3232, "step": 94670 }, { "epoch": 0.7832237250279191, "grad_norm": 381.82867431640625, "learning_rate": 1.5888479595736695e-06, "loss": 91.2148, "step": 94680 }, { "epoch": 0.7833064482772883, "grad_norm": 378.500244140625, "learning_rate": 1.5878169233972218e-06, "loss": 89.309, "step": 94690 }, { "epoch": 0.7833891715266575, "grad_norm": 641.9385375976562, "learning_rate": 1.5867861587125228e-06, "loss": 81.5497, "step": 94700 }, { "epoch": 0.7834718947760269, "grad_norm": 2622.712890625, "learning_rate": 1.5857556656015837e-06, "loss": 101.5679, "step": 94710 }, { "epoch": 0.783554618025396, "grad_norm": 1032.7587890625, "learning_rate": 1.5847254441463978e-06, "loss": 78.4932, "step": 94720 }, { "epoch": 0.7836373412747653, "grad_norm": 776.2950439453125, "learning_rate": 1.583695494428934e-06, "loss": 71.536, "step": 94730 }, { "epoch": 0.7837200645241346, "grad_norm": 942.0223999023438, "learning_rate": 1.5826658165311409e-06, "loss": 88.8115, "step": 94740 }, { "epoch": 0.7838027877735038, "grad_norm": 739.5013427734375, "learning_rate": 1.5816364105349451e-06, "loss": 117.7168, "step": 94750 }, { "epoch": 0.783885511022873, "grad_norm": 477.8927001953125, "learning_rate": 1.5806072765222524e-06, "loss": 96.1067, "step": 94760 }, { "epoch": 0.7839682342722422, "grad_norm": 751.9795532226562, "learning_rate": 1.5795784145749453e-06, "loss": 78.0177, "step": 94770 }, { "epoch": 0.7840509575216115, "grad_norm": 756.887451171875, "learning_rate": 1.5785498247748864e-06, "loss": 82.0907, "step": 94780 }, { "epoch": 0.7841336807709807, "grad_norm": 814.6382446289062, "learning_rate": 1.5775215072039157e-06, "loss": 118.0077, "step": 94790 }, { "epoch": 0.7842164040203499, "grad_norm": 492.3858337402344, "learning_rate": 1.5764934619438515e-06, "loss": 83.478, "step": 94800 }, { "epoch": 0.7842991272697192, "grad_norm": 1426.8245849609375, "learning_rate": 1.5754656890764912e-06, "loss": 82.622, "step": 94810 }, { "epoch": 0.7843818505190884, "grad_norm": 838.7455444335938, "learning_rate": 1.5744381886836091e-06, "loss": 99.2792, "step": 94820 }, { "epoch": 0.7844645737684576, "grad_norm": 848.1531982421875, "learning_rate": 1.5734109608469612e-06, "loss": 92.1673, "step": 94830 }, { "epoch": 0.7845472970178269, "grad_norm": 401.6071472167969, "learning_rate": 1.5723840056482731e-06, "loss": 79.848, "step": 94840 }, { "epoch": 0.7846300202671961, "grad_norm": 879.9978637695312, "learning_rate": 1.5713573231692613e-06, "loss": 59.712, "step": 94850 }, { "epoch": 0.7847127435165653, "grad_norm": 913.5989990234375, "learning_rate": 1.5703309134916116e-06, "loss": 74.9016, "step": 94860 }, { "epoch": 0.7847954667659346, "grad_norm": 1169.180908203125, "learning_rate": 1.5693047766969916e-06, "loss": 88.2039, "step": 94870 }, { "epoch": 0.7848781900153038, "grad_norm": 809.9779663085938, "learning_rate": 1.568278912867045e-06, "loss": 110.5675, "step": 94880 }, { "epoch": 0.784960913264673, "grad_norm": 501.2241516113281, "learning_rate": 1.5672533220833962e-06, "loss": 67.3911, "step": 94890 }, { "epoch": 0.7850436365140423, "grad_norm": 1112.9195556640625, "learning_rate": 1.5662280044276467e-06, "loss": 94.5414, "step": 94900 }, { "epoch": 0.7851263597634115, "grad_norm": 1013.2802734375, "learning_rate": 1.5652029599813773e-06, "loss": 106.9486, "step": 94910 }, { "epoch": 0.7852090830127807, "grad_norm": 693.358154296875, "learning_rate": 1.564178188826143e-06, "loss": 78.1435, "step": 94920 }, { "epoch": 0.78529180626215, "grad_norm": 1105.666259765625, "learning_rate": 1.5631536910434807e-06, "loss": 99.9668, "step": 94930 }, { "epoch": 0.7853745295115192, "grad_norm": 1162.7235107421875, "learning_rate": 1.5621294667149079e-06, "loss": 87.6308, "step": 94940 }, { "epoch": 0.7854572527608884, "grad_norm": 539.553955078125, "learning_rate": 1.561105515921915e-06, "loss": 91.6051, "step": 94950 }, { "epoch": 0.7855399760102577, "grad_norm": 722.3108520507812, "learning_rate": 1.5600818387459748e-06, "loss": 64.7316, "step": 94960 }, { "epoch": 0.7856226992596269, "grad_norm": 381.7198486328125, "learning_rate": 1.559058435268535e-06, "loss": 66.9805, "step": 94970 }, { "epoch": 0.7857054225089961, "grad_norm": 635.7542724609375, "learning_rate": 1.558035305571024e-06, "loss": 114.3884, "step": 94980 }, { "epoch": 0.7857881457583654, "grad_norm": 1400.4791259765625, "learning_rate": 1.5570124497348466e-06, "loss": 110.485, "step": 94990 }, { "epoch": 0.7858708690077346, "grad_norm": 678.18896484375, "learning_rate": 1.5559898678413898e-06, "loss": 96.3298, "step": 95000 }, { "epoch": 0.7859535922571038, "grad_norm": 1071.6373291015625, "learning_rate": 1.554967559972011e-06, "loss": 98.7113, "step": 95010 }, { "epoch": 0.7860363155064731, "grad_norm": 1374.9266357421875, "learning_rate": 1.5539455262080534e-06, "loss": 118.118, "step": 95020 }, { "epoch": 0.7861190387558423, "grad_norm": 759.0629272460938, "learning_rate": 1.5529237666308333e-06, "loss": 93.9589, "step": 95030 }, { "epoch": 0.7862017620052115, "grad_norm": 854.2042236328125, "learning_rate": 1.551902281321651e-06, "loss": 77.0455, "step": 95040 }, { "epoch": 0.7862844852545808, "grad_norm": 812.8806762695312, "learning_rate": 1.5508810703617794e-06, "loss": 79.7987, "step": 95050 }, { "epoch": 0.78636720850395, "grad_norm": 709.544677734375, "learning_rate": 1.5498601338324715e-06, "loss": 79.4533, "step": 95060 }, { "epoch": 0.7864499317533192, "grad_norm": 869.2487182617188, "learning_rate": 1.5488394718149586e-06, "loss": 70.6014, "step": 95070 }, { "epoch": 0.7865326550026885, "grad_norm": 612.1897583007812, "learning_rate": 1.5478190843904523e-06, "loss": 80.7072, "step": 95080 }, { "epoch": 0.7866153782520577, "grad_norm": 713.2626342773438, "learning_rate": 1.546798971640136e-06, "loss": 62.9009, "step": 95090 }, { "epoch": 0.7866981015014269, "grad_norm": 761.9999389648438, "learning_rate": 1.5457791336451777e-06, "loss": 74.8367, "step": 95100 }, { "epoch": 0.7867808247507962, "grad_norm": 1282.5313720703125, "learning_rate": 1.5447595704867213e-06, "loss": 114.0064, "step": 95110 }, { "epoch": 0.7868635480001654, "grad_norm": 748.5642700195312, "learning_rate": 1.543740282245888e-06, "loss": 96.991, "step": 95120 }, { "epoch": 0.7869462712495346, "grad_norm": 755.327392578125, "learning_rate": 1.5427212690037774e-06, "loss": 69.9441, "step": 95130 }, { "epoch": 0.787028994498904, "grad_norm": 669.56201171875, "learning_rate": 1.5417025308414695e-06, "loss": 101.2025, "step": 95140 }, { "epoch": 0.7871117177482732, "grad_norm": 710.7820434570312, "learning_rate": 1.5406840678400204e-06, "loss": 98.5515, "step": 95150 }, { "epoch": 0.7871944409976424, "grad_norm": 1083.8662109375, "learning_rate": 1.5396658800804632e-06, "loss": 102.2657, "step": 95160 }, { "epoch": 0.7872771642470117, "grad_norm": 1122.4599609375, "learning_rate": 1.5386479676438132e-06, "loss": 85.0763, "step": 95170 }, { "epoch": 0.7873598874963809, "grad_norm": 1315.6075439453125, "learning_rate": 1.5376303306110574e-06, "loss": 93.9366, "step": 95180 }, { "epoch": 0.7874426107457501, "grad_norm": 796.9031372070312, "learning_rate": 1.536612969063166e-06, "loss": 73.0806, "step": 95190 }, { "epoch": 0.7875253339951194, "grad_norm": 571.8799438476562, "learning_rate": 1.5355958830810858e-06, "loss": 67.0068, "step": 95200 }, { "epoch": 0.7876080572444886, "grad_norm": 558.7503051757812, "learning_rate": 1.5345790727457416e-06, "loss": 92.4161, "step": 95210 }, { "epoch": 0.7876907804938578, "grad_norm": 538.6721801757812, "learning_rate": 1.5335625381380364e-06, "loss": 75.6145, "step": 95220 }, { "epoch": 0.7877735037432271, "grad_norm": 1402.743408203125, "learning_rate": 1.5325462793388502e-06, "loss": 127.778, "step": 95230 }, { "epoch": 0.7878562269925963, "grad_norm": 934.5457763671875, "learning_rate": 1.531530296429044e-06, "loss": 93.78, "step": 95240 }, { "epoch": 0.7879389502419655, "grad_norm": 658.7655029296875, "learning_rate": 1.5305145894894547e-06, "loss": 99.2224, "step": 95250 }, { "epoch": 0.7880216734913348, "grad_norm": 1240.9752197265625, "learning_rate": 1.5294991586008977e-06, "loss": 99.2145, "step": 95260 }, { "epoch": 0.788104396740704, "grad_norm": 847.5444946289062, "learning_rate": 1.528484003844164e-06, "loss": 105.1305, "step": 95270 }, { "epoch": 0.7881871199900732, "grad_norm": 1418.359130859375, "learning_rate": 1.5274691253000257e-06, "loss": 150.746, "step": 95280 }, { "epoch": 0.7882698432394425, "grad_norm": 576.1387939453125, "learning_rate": 1.5264545230492323e-06, "loss": 72.2739, "step": 95290 }, { "epoch": 0.7883525664888117, "grad_norm": 1523.2347412109375, "learning_rate": 1.5254401971725114e-06, "loss": 76.5713, "step": 95300 }, { "epoch": 0.7884352897381809, "grad_norm": 850.4096069335938, "learning_rate": 1.5244261477505678e-06, "loss": 56.1657, "step": 95310 }, { "epoch": 0.7885180129875502, "grad_norm": 620.0430908203125, "learning_rate": 1.5234123748640834e-06, "loss": 91.6503, "step": 95320 }, { "epoch": 0.7886007362369194, "grad_norm": 732.49267578125, "learning_rate": 1.5223988785937222e-06, "loss": 91.2292, "step": 95330 }, { "epoch": 0.7886834594862886, "grad_norm": 1129.179443359375, "learning_rate": 1.5213856590201241e-06, "loss": 67.9968, "step": 95340 }, { "epoch": 0.7887661827356579, "grad_norm": 744.8907470703125, "learning_rate": 1.520372716223903e-06, "loss": 109.5926, "step": 95350 }, { "epoch": 0.7888489059850271, "grad_norm": 1017.0743408203125, "learning_rate": 1.5193600502856548e-06, "loss": 108.5264, "step": 95360 }, { "epoch": 0.7889316292343963, "grad_norm": 150.6322021484375, "learning_rate": 1.5183476612859538e-06, "loss": 100.5434, "step": 95370 }, { "epoch": 0.7890143524837656, "grad_norm": 1154.6719970703125, "learning_rate": 1.5173355493053509e-06, "loss": 91.2678, "step": 95380 }, { "epoch": 0.7890970757331348, "grad_norm": 906.0249633789062, "learning_rate": 1.5163237144243754e-06, "loss": 119.1058, "step": 95390 }, { "epoch": 0.789179798982504, "grad_norm": 976.3828735351562, "learning_rate": 1.5153121567235334e-06, "loss": 90.547, "step": 95400 }, { "epoch": 0.7892625222318733, "grad_norm": 1162.3525390625, "learning_rate": 1.5143008762833112e-06, "loss": 95.5285, "step": 95410 }, { "epoch": 0.7893452454812425, "grad_norm": 1161.5557861328125, "learning_rate": 1.5132898731841689e-06, "loss": 82.0131, "step": 95420 }, { "epoch": 0.7894279687306117, "grad_norm": 785.49755859375, "learning_rate": 1.512279147506553e-06, "loss": 79.7298, "step": 95430 }, { "epoch": 0.789510691979981, "grad_norm": 1093.42333984375, "learning_rate": 1.5112686993308768e-06, "loss": 117.2086, "step": 95440 }, { "epoch": 0.7895934152293502, "grad_norm": 1063.107177734375, "learning_rate": 1.5102585287375394e-06, "loss": 89.8321, "step": 95450 }, { "epoch": 0.7896761384787194, "grad_norm": 987.2368774414062, "learning_rate": 1.5092486358069154e-06, "loss": 105.4375, "step": 95460 }, { "epoch": 0.7897588617280887, "grad_norm": 788.5397338867188, "learning_rate": 1.508239020619357e-06, "loss": 104.0471, "step": 95470 }, { "epoch": 0.7898415849774579, "grad_norm": 926.9537963867188, "learning_rate": 1.5072296832551942e-06, "loss": 72.8565, "step": 95480 }, { "epoch": 0.7899243082268271, "grad_norm": 943.4337158203125, "learning_rate": 1.5062206237947363e-06, "loss": 100.017, "step": 95490 }, { "epoch": 0.7900070314761963, "grad_norm": 588.101806640625, "learning_rate": 1.5052118423182688e-06, "loss": 69.1182, "step": 95500 }, { "epoch": 0.7900897547255656, "grad_norm": 267.3822021484375, "learning_rate": 1.5042033389060563e-06, "loss": 62.8242, "step": 95510 }, { "epoch": 0.7901724779749348, "grad_norm": 601.46875, "learning_rate": 1.5031951136383406e-06, "loss": 107.1168, "step": 95520 }, { "epoch": 0.790255201224304, "grad_norm": 795.5377197265625, "learning_rate": 1.5021871665953414e-06, "loss": 70.5595, "step": 95530 }, { "epoch": 0.7903379244736733, "grad_norm": 1379.524658203125, "learning_rate": 1.5011794978572568e-06, "loss": 84.3169, "step": 95540 }, { "epoch": 0.7904206477230425, "grad_norm": 967.1463012695312, "learning_rate": 1.500172107504262e-06, "loss": 78.8489, "step": 95550 }, { "epoch": 0.7905033709724117, "grad_norm": 608.6145629882812, "learning_rate": 1.4991649956165105e-06, "loss": 64.6662, "step": 95560 }, { "epoch": 0.790586094221781, "grad_norm": 695.8089599609375, "learning_rate": 1.4981581622741337e-06, "loss": 158.1259, "step": 95570 }, { "epoch": 0.7906688174711503, "grad_norm": 959.3106079101562, "learning_rate": 1.4971516075572407e-06, "loss": 74.8305, "step": 95580 }, { "epoch": 0.7907515407205195, "grad_norm": 1071.234375, "learning_rate": 1.4961453315459184e-06, "loss": 65.4966, "step": 95590 }, { "epoch": 0.7908342639698888, "grad_norm": 680.824951171875, "learning_rate": 1.4951393343202314e-06, "loss": 92.0542, "step": 95600 }, { "epoch": 0.790916987219258, "grad_norm": 884.766357421875, "learning_rate": 1.4941336159602227e-06, "loss": 81.2589, "step": 95610 }, { "epoch": 0.7909997104686272, "grad_norm": 1236.22900390625, "learning_rate": 1.4931281765459122e-06, "loss": 88.8313, "step": 95620 }, { "epoch": 0.7910824337179965, "grad_norm": 1996.257568359375, "learning_rate": 1.492123016157298e-06, "loss": 86.4051, "step": 95630 }, { "epoch": 0.7911651569673657, "grad_norm": 818.218994140625, "learning_rate": 1.4911181348743569e-06, "loss": 82.9814, "step": 95640 }, { "epoch": 0.7912478802167349, "grad_norm": 925.0620727539062, "learning_rate": 1.490113532777042e-06, "loss": 140.5014, "step": 95650 }, { "epoch": 0.7913306034661042, "grad_norm": 568.860595703125, "learning_rate": 1.4891092099452853e-06, "loss": 60.901, "step": 95660 }, { "epoch": 0.7914133267154734, "grad_norm": 1018.9000244140625, "learning_rate": 1.4881051664589958e-06, "loss": 94.0601, "step": 95670 }, { "epoch": 0.7914960499648426, "grad_norm": 646.0346069335938, "learning_rate": 1.4871014023980607e-06, "loss": 74.5163, "step": 95680 }, { "epoch": 0.7915787732142119, "grad_norm": 749.203857421875, "learning_rate": 1.486097917842345e-06, "loss": 76.4487, "step": 95690 }, { "epoch": 0.7916614964635811, "grad_norm": 519.8616333007812, "learning_rate": 1.4850947128716914e-06, "loss": 76.2586, "step": 95700 }, { "epoch": 0.7917442197129503, "grad_norm": 1079.2755126953125, "learning_rate": 1.4840917875659206e-06, "loss": 85.6422, "step": 95710 }, { "epoch": 0.7918269429623196, "grad_norm": 606.11474609375, "learning_rate": 1.4830891420048298e-06, "loss": 91.0621, "step": 95720 }, { "epoch": 0.7919096662116888, "grad_norm": 1178.3045654296875, "learning_rate": 1.482086776268196e-06, "loss": 99.66, "step": 95730 }, { "epoch": 0.791992389461058, "grad_norm": 980.57763671875, "learning_rate": 1.4810846904357722e-06, "loss": 96.6671, "step": 95740 }, { "epoch": 0.7920751127104273, "grad_norm": 1058.7889404296875, "learning_rate": 1.48008288458729e-06, "loss": 109.5006, "step": 95750 }, { "epoch": 0.7921578359597965, "grad_norm": 529.0836181640625, "learning_rate": 1.4790813588024584e-06, "loss": 88.0496, "step": 95760 }, { "epoch": 0.7922405592091657, "grad_norm": 1165.0322265625, "learning_rate": 1.4780801131609657e-06, "loss": 111.1433, "step": 95770 }, { "epoch": 0.792323282458535, "grad_norm": 773.7676391601562, "learning_rate": 1.477079147742474e-06, "loss": 106.7083, "step": 95780 }, { "epoch": 0.7924060057079042, "grad_norm": 863.2052612304688, "learning_rate": 1.476078462626624e-06, "loss": 83.5902, "step": 95790 }, { "epoch": 0.7924887289572734, "grad_norm": 709.7434692382812, "learning_rate": 1.4750780578930402e-06, "loss": 66.4846, "step": 95800 }, { "epoch": 0.7925714522066427, "grad_norm": 703.7163696289062, "learning_rate": 1.4740779336213178e-06, "loss": 86.0505, "step": 95810 }, { "epoch": 0.7926541754560119, "grad_norm": 1240.612060546875, "learning_rate": 1.4730780898910329e-06, "loss": 108.2829, "step": 95820 }, { "epoch": 0.7927368987053811, "grad_norm": 702.4696655273438, "learning_rate": 1.4720785267817378e-06, "loss": 59.4141, "step": 95830 }, { "epoch": 0.7928196219547504, "grad_norm": 966.9011840820312, "learning_rate": 1.4710792443729633e-06, "loss": 112.7589, "step": 95840 }, { "epoch": 0.7929023452041196, "grad_norm": 1277.8905029296875, "learning_rate": 1.470080242744218e-06, "loss": 72.1038, "step": 95850 }, { "epoch": 0.7929850684534888, "grad_norm": 1504.42529296875, "learning_rate": 1.4690815219749887e-06, "loss": 88.3772, "step": 95860 }, { "epoch": 0.7930677917028581, "grad_norm": 663.6552734375, "learning_rate": 1.4680830821447368e-06, "loss": 80.6271, "step": 95870 }, { "epoch": 0.7931505149522273, "grad_norm": 686.2540283203125, "learning_rate": 1.4670849233329032e-06, "loss": 85.9933, "step": 95880 }, { "epoch": 0.7932332382015965, "grad_norm": 648.2404174804688, "learning_rate": 1.4660870456189098e-06, "loss": 91.8078, "step": 95890 }, { "epoch": 0.7933159614509658, "grad_norm": 1184.7274169921875, "learning_rate": 1.4650894490821515e-06, "loss": 105.1896, "step": 95900 }, { "epoch": 0.793398684700335, "grad_norm": 900.9071655273438, "learning_rate": 1.464092133802003e-06, "loss": 95.3142, "step": 95910 }, { "epoch": 0.7934814079497042, "grad_norm": 786.411865234375, "learning_rate": 1.4630950998578158e-06, "loss": 71.5299, "step": 95920 }, { "epoch": 0.7935641311990735, "grad_norm": 537.1038818359375, "learning_rate": 1.4620983473289192e-06, "loss": 106.0131, "step": 95930 }, { "epoch": 0.7936468544484427, "grad_norm": 907.1212158203125, "learning_rate": 1.4611018762946217e-06, "loss": 67.942, "step": 95940 }, { "epoch": 0.7937295776978119, "grad_norm": 532.5782470703125, "learning_rate": 1.4601056868342051e-06, "loss": 77.3442, "step": 95950 }, { "epoch": 0.7938123009471812, "grad_norm": 1575.781005859375, "learning_rate": 1.4591097790269333e-06, "loss": 121.2645, "step": 95960 }, { "epoch": 0.7938950241965504, "grad_norm": 569.5699462890625, "learning_rate": 1.4581141529520455e-06, "loss": 100.5707, "step": 95970 }, { "epoch": 0.7939777474459196, "grad_norm": 680.08251953125, "learning_rate": 1.4571188086887583e-06, "loss": 87.8128, "step": 95980 }, { "epoch": 0.794060470695289, "grad_norm": 462.587890625, "learning_rate": 1.4561237463162693e-06, "loss": 92.0846, "step": 95990 }, { "epoch": 0.7941431939446582, "grad_norm": 552.5233154296875, "learning_rate": 1.4551289659137497e-06, "loss": 64.9093, "step": 96000 }, { "epoch": 0.7942259171940274, "grad_norm": 878.8954467773438, "learning_rate": 1.454134467560349e-06, "loss": 90.1892, "step": 96010 }, { "epoch": 0.7943086404433967, "grad_norm": 1012.231689453125, "learning_rate": 1.453140251335196e-06, "loss": 85.0141, "step": 96020 }, { "epoch": 0.7943913636927659, "grad_norm": 762.498291015625, "learning_rate": 1.4521463173173966e-06, "loss": 85.905, "step": 96030 }, { "epoch": 0.7944740869421351, "grad_norm": 857.3758544921875, "learning_rate": 1.4511526655860309e-06, "loss": 94.0118, "step": 96040 }, { "epoch": 0.7945568101915044, "grad_norm": 752.3488159179688, "learning_rate": 1.4501592962201604e-06, "loss": 79.3199, "step": 96050 }, { "epoch": 0.7946395334408736, "grad_norm": 1673.278564453125, "learning_rate": 1.4491662092988234e-06, "loss": 145.9322, "step": 96060 }, { "epoch": 0.7947222566902428, "grad_norm": 1270.6055908203125, "learning_rate": 1.448173404901035e-06, "loss": 113.5627, "step": 96070 }, { "epoch": 0.7948049799396121, "grad_norm": 464.31982421875, "learning_rate": 1.447180883105786e-06, "loss": 75.3027, "step": 96080 }, { "epoch": 0.7948877031889813, "grad_norm": 786.0927124023438, "learning_rate": 1.446188643992051e-06, "loss": 95.312, "step": 96090 }, { "epoch": 0.7949704264383505, "grad_norm": 571.1138916015625, "learning_rate": 1.4451966876387752e-06, "loss": 61.4562, "step": 96100 }, { "epoch": 0.7950531496877198, "grad_norm": 684.5860595703125, "learning_rate": 1.4442050141248853e-06, "loss": 63.8922, "step": 96110 }, { "epoch": 0.795135872937089, "grad_norm": 753.0285034179688, "learning_rate": 1.4432136235292848e-06, "loss": 78.2068, "step": 96120 }, { "epoch": 0.7952185961864582, "grad_norm": 1021.8491821289062, "learning_rate": 1.442222515930851e-06, "loss": 99.4391, "step": 96130 }, { "epoch": 0.7953013194358275, "grad_norm": 650.785888671875, "learning_rate": 1.441231691408444e-06, "loss": 96.3516, "step": 96140 }, { "epoch": 0.7953840426851967, "grad_norm": 446.81951904296875, "learning_rate": 1.4402411500408985e-06, "loss": 60.6617, "step": 96150 }, { "epoch": 0.7954667659345659, "grad_norm": 918.9302978515625, "learning_rate": 1.439250891907028e-06, "loss": 92.1007, "step": 96160 }, { "epoch": 0.7955494891839352, "grad_norm": 1528.4639892578125, "learning_rate": 1.4382609170856222e-06, "loss": 94.0727, "step": 96170 }, { "epoch": 0.7956322124333044, "grad_norm": 991.0545654296875, "learning_rate": 1.4372712256554471e-06, "loss": 74.275, "step": 96180 }, { "epoch": 0.7957149356826736, "grad_norm": 408.3299560546875, "learning_rate": 1.436281817695252e-06, "loss": 93.5512, "step": 96190 }, { "epoch": 0.7957976589320428, "grad_norm": 305.2838439941406, "learning_rate": 1.4352926932837591e-06, "loss": 84.8283, "step": 96200 }, { "epoch": 0.7958803821814121, "grad_norm": 1160.2215576171875, "learning_rate": 1.4343038524996645e-06, "loss": 73.67, "step": 96210 }, { "epoch": 0.7959631054307813, "grad_norm": 838.9818725585938, "learning_rate": 1.4333152954216483e-06, "loss": 85.1046, "step": 96220 }, { "epoch": 0.7960458286801505, "grad_norm": 1157.9669189453125, "learning_rate": 1.4323270221283653e-06, "loss": 96.3205, "step": 96230 }, { "epoch": 0.7961285519295198, "grad_norm": 1076.052978515625, "learning_rate": 1.4313390326984478e-06, "loss": 111.263, "step": 96240 }, { "epoch": 0.796211275178889, "grad_norm": 715.1856689453125, "learning_rate": 1.4303513272105057e-06, "loss": 78.668, "step": 96250 }, { "epoch": 0.7962939984282582, "grad_norm": 793.4249877929688, "learning_rate": 1.4293639057431258e-06, "loss": 78.2108, "step": 96260 }, { "epoch": 0.7963767216776275, "grad_norm": 841.6565551757812, "learning_rate": 1.4283767683748711e-06, "loss": 103.3483, "step": 96270 }, { "epoch": 0.7964594449269967, "grad_norm": 1000.8982543945312, "learning_rate": 1.4273899151842873e-06, "loss": 90.5414, "step": 96280 }, { "epoch": 0.7965421681763659, "grad_norm": 751.5543212890625, "learning_rate": 1.4264033462498932e-06, "loss": 72.9708, "step": 96290 }, { "epoch": 0.7966248914257352, "grad_norm": 768.4910278320312, "learning_rate": 1.4254170616501828e-06, "loss": 84.1671, "step": 96300 }, { "epoch": 0.7967076146751044, "grad_norm": 1033.3453369140625, "learning_rate": 1.424431061463632e-06, "loss": 81.5555, "step": 96310 }, { "epoch": 0.7967903379244736, "grad_norm": 957.588623046875, "learning_rate": 1.4234453457686914e-06, "loss": 105.335, "step": 96320 }, { "epoch": 0.7968730611738429, "grad_norm": 1199.724365234375, "learning_rate": 1.4224599146437906e-06, "loss": 87.4864, "step": 96330 }, { "epoch": 0.7969557844232121, "grad_norm": 529.3333740234375, "learning_rate": 1.4214747681673362e-06, "loss": 83.0788, "step": 96340 }, { "epoch": 0.7970385076725813, "grad_norm": 629.4002075195312, "learning_rate": 1.4204899064177107e-06, "loss": 89.5744, "step": 96350 }, { "epoch": 0.7971212309219506, "grad_norm": 1154.12255859375, "learning_rate": 1.4195053294732757e-06, "loss": 110.1019, "step": 96360 }, { "epoch": 0.7972039541713198, "grad_norm": 886.4882202148438, "learning_rate": 1.4185210374123698e-06, "loss": 64.0015, "step": 96370 }, { "epoch": 0.797286677420689, "grad_norm": 1252.8187255859375, "learning_rate": 1.417537030313308e-06, "loss": 88.5995, "step": 96380 }, { "epoch": 0.7973694006700583, "grad_norm": 1106.0509033203125, "learning_rate": 1.416553308254383e-06, "loss": 107.5624, "step": 96390 }, { "epoch": 0.7974521239194275, "grad_norm": 608.2451782226562, "learning_rate": 1.415569871313866e-06, "loss": 101.158, "step": 96400 }, { "epoch": 0.7975348471687967, "grad_norm": 2912.268798828125, "learning_rate": 1.4145867195700036e-06, "loss": 84.9362, "step": 96410 }, { "epoch": 0.797617570418166, "grad_norm": 1384.496337890625, "learning_rate": 1.4136038531010216e-06, "loss": 102.7532, "step": 96420 }, { "epoch": 0.7977002936675353, "grad_norm": 765.7507934570312, "learning_rate": 1.4126212719851211e-06, "loss": 101.0161, "step": 96430 }, { "epoch": 0.7977830169169045, "grad_norm": 559.5115356445312, "learning_rate": 1.411638976300483e-06, "loss": 59.2422, "step": 96440 }, { "epoch": 0.7978657401662738, "grad_norm": 683.1123046875, "learning_rate": 1.4106569661252623e-06, "loss": 78.2508, "step": 96450 }, { "epoch": 0.797948463415643, "grad_norm": 1658.7459716796875, "learning_rate": 1.4096752415375941e-06, "loss": 71.4444, "step": 96460 }, { "epoch": 0.7980311866650122, "grad_norm": 790.0877075195312, "learning_rate": 1.40869380261559e-06, "loss": 91.7176, "step": 96470 }, { "epoch": 0.7981139099143815, "grad_norm": 841.804931640625, "learning_rate": 1.4077126494373379e-06, "loss": 81.0083, "step": 96480 }, { "epoch": 0.7981966331637507, "grad_norm": 936.5332641601562, "learning_rate": 1.406731782080904e-06, "loss": 73.6172, "step": 96490 }, { "epoch": 0.7982793564131199, "grad_norm": 620.3352661132812, "learning_rate": 1.4057512006243312e-06, "loss": 113.0523, "step": 96500 }, { "epoch": 0.7983620796624892, "grad_norm": 785.4129638671875, "learning_rate": 1.4047709051456398e-06, "loss": 82.846, "step": 96510 }, { "epoch": 0.7984448029118584, "grad_norm": 1122.926025390625, "learning_rate": 1.403790895722828e-06, "loss": 73.4351, "step": 96520 }, { "epoch": 0.7985275261612276, "grad_norm": 1235.810546875, "learning_rate": 1.40281117243387e-06, "loss": 113.9662, "step": 96530 }, { "epoch": 0.7986102494105969, "grad_norm": 608.0697021484375, "learning_rate": 1.4018317353567185e-06, "loss": 91.6859, "step": 96540 }, { "epoch": 0.7986929726599661, "grad_norm": 847.255615234375, "learning_rate": 1.4008525845693022e-06, "loss": 103.3336, "step": 96550 }, { "epoch": 0.7987756959093353, "grad_norm": 1009.511474609375, "learning_rate": 1.3998737201495277e-06, "loss": 79.1723, "step": 96560 }, { "epoch": 0.7988584191587046, "grad_norm": 701.9151611328125, "learning_rate": 1.3988951421752789e-06, "loss": 81.0391, "step": 96570 }, { "epoch": 0.7989411424080738, "grad_norm": 777.0698852539062, "learning_rate": 1.3979168507244172e-06, "loss": 94.3455, "step": 96580 }, { "epoch": 0.799023865657443, "grad_norm": 523.3616943359375, "learning_rate": 1.3969388458747802e-06, "loss": 78.6874, "step": 96590 }, { "epoch": 0.7991065889068123, "grad_norm": 876.171142578125, "learning_rate": 1.3959611277041834e-06, "loss": 85.3403, "step": 96600 }, { "epoch": 0.7991893121561815, "grad_norm": 538.2999267578125, "learning_rate": 1.394983696290419e-06, "loss": 94.9753, "step": 96610 }, { "epoch": 0.7992720354055507, "grad_norm": 890.1272583007812, "learning_rate": 1.3940065517112579e-06, "loss": 90.0231, "step": 96620 }, { "epoch": 0.79935475865492, "grad_norm": 1206.276123046875, "learning_rate": 1.3930296940444472e-06, "loss": 85.5307, "step": 96630 }, { "epoch": 0.7994374819042892, "grad_norm": 1130.0594482421875, "learning_rate": 1.392053123367707e-06, "loss": 89.702, "step": 96640 }, { "epoch": 0.7995202051536584, "grad_norm": 827.4190063476562, "learning_rate": 1.3910768397587427e-06, "loss": 137.461, "step": 96650 }, { "epoch": 0.7996029284030277, "grad_norm": 794.8823852539062, "learning_rate": 1.3901008432952323e-06, "loss": 111.7997, "step": 96660 }, { "epoch": 0.7996856516523969, "grad_norm": 721.3433837890625, "learning_rate": 1.38912513405483e-06, "loss": 81.388, "step": 96670 }, { "epoch": 0.7997683749017661, "grad_norm": 998.39990234375, "learning_rate": 1.3881497121151694e-06, "loss": 63.9328, "step": 96680 }, { "epoch": 0.7998510981511354, "grad_norm": 1102.243408203125, "learning_rate": 1.3871745775538598e-06, "loss": 95.1184, "step": 96690 }, { "epoch": 0.7999338214005046, "grad_norm": 737.71240234375, "learning_rate": 1.3861997304484887e-06, "loss": 87.6006, "step": 96700 }, { "epoch": 0.8000165446498738, "grad_norm": 1060.644775390625, "learning_rate": 1.3852251708766195e-06, "loss": 82.6589, "step": 96710 }, { "epoch": 0.8000992678992431, "grad_norm": 946.8339233398438, "learning_rate": 1.384250898915796e-06, "loss": 100.851, "step": 96720 }, { "epoch": 0.8001819911486123, "grad_norm": 1012.8794555664062, "learning_rate": 1.3832769146435327e-06, "loss": 131.0507, "step": 96730 }, { "epoch": 0.8002647143979815, "grad_norm": 1018.9071655273438, "learning_rate": 1.3823032181373253e-06, "loss": 84.4321, "step": 96740 }, { "epoch": 0.8003474376473508, "grad_norm": 2128.4169921875, "learning_rate": 1.3813298094746491e-06, "loss": 97.8866, "step": 96750 }, { "epoch": 0.80043016089672, "grad_norm": 691.6203002929688, "learning_rate": 1.3803566887329528e-06, "loss": 69.4865, "step": 96760 }, { "epoch": 0.8005128841460892, "grad_norm": 894.628662109375, "learning_rate": 1.3793838559896628e-06, "loss": 97.1931, "step": 96770 }, { "epoch": 0.8005956073954585, "grad_norm": 844.1419067382812, "learning_rate": 1.3784113113221826e-06, "loss": 85.3941, "step": 96780 }, { "epoch": 0.8006783306448277, "grad_norm": 966.1434326171875, "learning_rate": 1.3774390548078942e-06, "loss": 91.669, "step": 96790 }, { "epoch": 0.8007610538941969, "grad_norm": 842.3434448242188, "learning_rate": 1.3764670865241557e-06, "loss": 80.8415, "step": 96800 }, { "epoch": 0.8008437771435662, "grad_norm": 607.7752685546875, "learning_rate": 1.3754954065483006e-06, "loss": 110.9996, "step": 96810 }, { "epoch": 0.8009265003929354, "grad_norm": 1030.109130859375, "learning_rate": 1.374524014957641e-06, "loss": 79.0626, "step": 96820 }, { "epoch": 0.8010092236423046, "grad_norm": 892.157470703125, "learning_rate": 1.373552911829466e-06, "loss": 96.9693, "step": 96830 }, { "epoch": 0.801091946891674, "grad_norm": 1113.1424560546875, "learning_rate": 1.3725820972410437e-06, "loss": 83.1945, "step": 96840 }, { "epoch": 0.8011746701410432, "grad_norm": 726.9800415039062, "learning_rate": 1.3716115712696166e-06, "loss": 92.1727, "step": 96850 }, { "epoch": 0.8012573933904124, "grad_norm": 1125.0447998046875, "learning_rate": 1.3706413339924047e-06, "loss": 102.9361, "step": 96860 }, { "epoch": 0.8013401166397817, "grad_norm": 452.7906799316406, "learning_rate": 1.369671385486605e-06, "loss": 91.2214, "step": 96870 }, { "epoch": 0.8014228398891509, "grad_norm": 509.42926025390625, "learning_rate": 1.3687017258293928e-06, "loss": 82.8149, "step": 96880 }, { "epoch": 0.8015055631385201, "grad_norm": 1304.8404541015625, "learning_rate": 1.36773235509792e-06, "loss": 123.0008, "step": 96890 }, { "epoch": 0.8015882863878894, "grad_norm": 618.9688720703125, "learning_rate": 1.366763273369312e-06, "loss": 85.5639, "step": 96900 }, { "epoch": 0.8016710096372586, "grad_norm": 613.8262329101562, "learning_rate": 1.3657944807206764e-06, "loss": 64.9996, "step": 96910 }, { "epoch": 0.8017537328866278, "grad_norm": 841.429931640625, "learning_rate": 1.3648259772290957e-06, "loss": 109.687, "step": 96920 }, { "epoch": 0.801836456135997, "grad_norm": 544.7089233398438, "learning_rate": 1.3638577629716265e-06, "loss": 68.7118, "step": 96930 }, { "epoch": 0.8019191793853663, "grad_norm": 917.5126342773438, "learning_rate": 1.3628898380253092e-06, "loss": 85.4107, "step": 96940 }, { "epoch": 0.8020019026347355, "grad_norm": 790.5340576171875, "learning_rate": 1.3619222024671557e-06, "loss": 83.7136, "step": 96950 }, { "epoch": 0.8020846258841047, "grad_norm": 1110.8216552734375, "learning_rate": 1.360954856374156e-06, "loss": 105.1742, "step": 96960 }, { "epoch": 0.802167349133474, "grad_norm": 620.2880859375, "learning_rate": 1.3599877998232768e-06, "loss": 78.4277, "step": 96970 }, { "epoch": 0.8022500723828432, "grad_norm": 1336.2269287109375, "learning_rate": 1.359021032891465e-06, "loss": 87.1557, "step": 96980 }, { "epoch": 0.8023327956322124, "grad_norm": 723.6621704101562, "learning_rate": 1.358054555655638e-06, "loss": 84.3839, "step": 96990 }, { "epoch": 0.8024155188815817, "grad_norm": 838.581298828125, "learning_rate": 1.357088368192696e-06, "loss": 111.4981, "step": 97000 }, { "epoch": 0.8024982421309509, "grad_norm": 564.4741821289062, "learning_rate": 1.356122470579514e-06, "loss": 67.0325, "step": 97010 }, { "epoch": 0.8025809653803201, "grad_norm": 722.4137573242188, "learning_rate": 1.3551568628929434e-06, "loss": 66.7703, "step": 97020 }, { "epoch": 0.8026636886296894, "grad_norm": 958.3905029296875, "learning_rate": 1.3541915452098126e-06, "loss": 101.0116, "step": 97030 }, { "epoch": 0.8027464118790586, "grad_norm": 1039.2890625, "learning_rate": 1.3532265176069298e-06, "loss": 88.899, "step": 97040 }, { "epoch": 0.8028291351284278, "grad_norm": 715.7427368164062, "learning_rate": 1.3522617801610767e-06, "loss": 91.0962, "step": 97050 }, { "epoch": 0.8029118583777971, "grad_norm": 973.7153930664062, "learning_rate": 1.3512973329490137e-06, "loss": 88.5474, "step": 97060 }, { "epoch": 0.8029945816271663, "grad_norm": 1014.022705078125, "learning_rate": 1.3503331760474759e-06, "loss": 78.5856, "step": 97070 }, { "epoch": 0.8030773048765355, "grad_norm": 1352.9793701171875, "learning_rate": 1.3493693095331773e-06, "loss": 103.864, "step": 97080 }, { "epoch": 0.8031600281259048, "grad_norm": 384.0733642578125, "learning_rate": 1.3484057334828088e-06, "loss": 79.577, "step": 97090 }, { "epoch": 0.803242751375274, "grad_norm": 1254.0048828125, "learning_rate": 1.3474424479730375e-06, "loss": 108.2203, "step": 97100 }, { "epoch": 0.8033254746246432, "grad_norm": 641.9199829101562, "learning_rate": 1.3464794530805076e-06, "loss": 83.1312, "step": 97110 }, { "epoch": 0.8034081978740125, "grad_norm": 770.1970825195312, "learning_rate": 1.34551674888184e-06, "loss": 89.1073, "step": 97120 }, { "epoch": 0.8034909211233817, "grad_norm": 795.2720947265625, "learning_rate": 1.3445543354536317e-06, "loss": 134.2826, "step": 97130 }, { "epoch": 0.8035736443727509, "grad_norm": 371.7003479003906, "learning_rate": 1.3435922128724599e-06, "loss": 85.9229, "step": 97140 }, { "epoch": 0.8036563676221202, "grad_norm": 1355.1334228515625, "learning_rate": 1.3426303812148766e-06, "loss": 116.6411, "step": 97150 }, { "epoch": 0.8037390908714894, "grad_norm": 1486.0423583984375, "learning_rate": 1.3416688405574074e-06, "loss": 83.7708, "step": 97160 }, { "epoch": 0.8038218141208586, "grad_norm": 490.289794921875, "learning_rate": 1.3407075909765593e-06, "loss": 103.2564, "step": 97170 }, { "epoch": 0.8039045373702279, "grad_norm": 699.4607543945312, "learning_rate": 1.339746632548814e-06, "loss": 106.1111, "step": 97180 }, { "epoch": 0.8039872606195971, "grad_norm": 749.7698974609375, "learning_rate": 1.3387859653506314e-06, "loss": 96.1973, "step": 97190 }, { "epoch": 0.8040699838689663, "grad_norm": 257.79119873046875, "learning_rate": 1.3378255894584463e-06, "loss": 93.4603, "step": 97200 }, { "epoch": 0.8041527071183356, "grad_norm": 482.4723815917969, "learning_rate": 1.3368655049486717e-06, "loss": 114.9346, "step": 97210 }, { "epoch": 0.8042354303677048, "grad_norm": 308.8788757324219, "learning_rate": 1.3359057118976976e-06, "loss": 88.0903, "step": 97220 }, { "epoch": 0.804318153617074, "grad_norm": 932.05419921875, "learning_rate": 1.3349462103818906e-06, "loss": 76.6318, "step": 97230 }, { "epoch": 0.8044008768664433, "grad_norm": 3546.006103515625, "learning_rate": 1.3339870004775929e-06, "loss": 82.4357, "step": 97240 }, { "epoch": 0.8044836001158125, "grad_norm": 689.85302734375, "learning_rate": 1.3330280822611246e-06, "loss": 94.2522, "step": 97250 }, { "epoch": 0.8045663233651817, "grad_norm": 322.86669921875, "learning_rate": 1.3320694558087832e-06, "loss": 76.2741, "step": 97260 }, { "epoch": 0.8046490466145511, "grad_norm": 1147.7379150390625, "learning_rate": 1.3311111211968414e-06, "loss": 87.9512, "step": 97270 }, { "epoch": 0.8047317698639203, "grad_norm": 778.215087890625, "learning_rate": 1.3301530785015492e-06, "loss": 68.0114, "step": 97280 }, { "epoch": 0.8048144931132895, "grad_norm": 918.3253173828125, "learning_rate": 1.329195327799135e-06, "loss": 85.8742, "step": 97290 }, { "epoch": 0.8048972163626588, "grad_norm": 1470.5067138671875, "learning_rate": 1.3282378691658015e-06, "loss": 80.2126, "step": 97300 }, { "epoch": 0.804979939612028, "grad_norm": 682.7937622070312, "learning_rate": 1.3272807026777302e-06, "loss": 77.8562, "step": 97310 }, { "epoch": 0.8050626628613972, "grad_norm": 711.0369262695312, "learning_rate": 1.3263238284110769e-06, "loss": 88.1487, "step": 97320 }, { "epoch": 0.8051453861107665, "grad_norm": 711.9708251953125, "learning_rate": 1.3253672464419776e-06, "loss": 80.779, "step": 97330 }, { "epoch": 0.8052281093601357, "grad_norm": 892.573486328125, "learning_rate": 1.3244109568465414e-06, "loss": 106.5878, "step": 97340 }, { "epoch": 0.8053108326095049, "grad_norm": 895.7553100585938, "learning_rate": 1.3234549597008572e-06, "loss": 84.2314, "step": 97350 }, { "epoch": 0.8053935558588742, "grad_norm": 486.6683349609375, "learning_rate": 1.322499255080989e-06, "loss": 66.8342, "step": 97360 }, { "epoch": 0.8054762791082434, "grad_norm": 531.82763671875, "learning_rate": 1.3215438430629774e-06, "loss": 66.0651, "step": 97370 }, { "epoch": 0.8055590023576126, "grad_norm": 902.6575317382812, "learning_rate": 1.3205887237228399e-06, "loss": 80.48, "step": 97380 }, { "epoch": 0.8056417256069819, "grad_norm": 784.8746948242188, "learning_rate": 1.3196338971365719e-06, "loss": 80.4638, "step": 97390 }, { "epoch": 0.8057244488563511, "grad_norm": 715.1857299804688, "learning_rate": 1.3186793633801443e-06, "loss": 91.9943, "step": 97400 }, { "epoch": 0.8058071721057203, "grad_norm": 620.5697631835938, "learning_rate": 1.317725122529504e-06, "loss": 79.546, "step": 97410 }, { "epoch": 0.8058898953550896, "grad_norm": 688.434326171875, "learning_rate": 1.3167711746605771e-06, "loss": 88.8246, "step": 97420 }, { "epoch": 0.8059726186044588, "grad_norm": 629.0154418945312, "learning_rate": 1.315817519849264e-06, "loss": 78.7018, "step": 97430 }, { "epoch": 0.806055341853828, "grad_norm": 568.8397216796875, "learning_rate": 1.3148641581714421e-06, "loss": 64.2857, "step": 97440 }, { "epoch": 0.8061380651031973, "grad_norm": 1156.6614990234375, "learning_rate": 1.3139110897029672e-06, "loss": 80.8269, "step": 97450 }, { "epoch": 0.8062207883525665, "grad_norm": 939.9249267578125, "learning_rate": 1.3129583145196701e-06, "loss": 98.3824, "step": 97460 }, { "epoch": 0.8063035116019357, "grad_norm": 677.0330810546875, "learning_rate": 1.3120058326973583e-06, "loss": 80.2276, "step": 97470 }, { "epoch": 0.806386234851305, "grad_norm": 733.0352172851562, "learning_rate": 1.3110536443118172e-06, "loss": 100.2774, "step": 97480 }, { "epoch": 0.8064689581006742, "grad_norm": 895.4539794921875, "learning_rate": 1.3101017494388074e-06, "loss": 88.9246, "step": 97490 }, { "epoch": 0.8065516813500434, "grad_norm": 1752.5474853515625, "learning_rate": 1.3091501481540676e-06, "loss": 83.0293, "step": 97500 }, { "epoch": 0.8066344045994127, "grad_norm": 781.7203979492188, "learning_rate": 1.3081988405333106e-06, "loss": 95.9738, "step": 97510 }, { "epoch": 0.8067171278487819, "grad_norm": 1117.3587646484375, "learning_rate": 1.3072478266522298e-06, "loss": 97.9509, "step": 97520 }, { "epoch": 0.8067998510981511, "grad_norm": 1128.07177734375, "learning_rate": 1.3062971065864915e-06, "loss": 90.2064, "step": 97530 }, { "epoch": 0.8068825743475204, "grad_norm": 563.0820922851562, "learning_rate": 1.305346680411741e-06, "loss": 97.338, "step": 97540 }, { "epoch": 0.8069652975968896, "grad_norm": 1097.416748046875, "learning_rate": 1.3043965482035987e-06, "loss": 94.4323, "step": 97550 }, { "epoch": 0.8070480208462588, "grad_norm": 1162.277587890625, "learning_rate": 1.3034467100376624e-06, "loss": 112.2583, "step": 97560 }, { "epoch": 0.8071307440956281, "grad_norm": 708.5651245117188, "learning_rate": 1.3024971659895069e-06, "loss": 72.2733, "step": 97570 }, { "epoch": 0.8072134673449973, "grad_norm": 877.0707397460938, "learning_rate": 1.3015479161346839e-06, "loss": 72.6177, "step": 97580 }, { "epoch": 0.8072961905943665, "grad_norm": 622.7434692382812, "learning_rate": 1.3005989605487168e-06, "loss": 85.8667, "step": 97590 }, { "epoch": 0.8073789138437358, "grad_norm": 1124.309326171875, "learning_rate": 1.2996502993071137e-06, "loss": 128.3939, "step": 97600 }, { "epoch": 0.807461637093105, "grad_norm": 959.4961547851562, "learning_rate": 1.2987019324853539e-06, "loss": 86.4192, "step": 97610 }, { "epoch": 0.8075443603424742, "grad_norm": 926.9354248046875, "learning_rate": 1.2977538601588951e-06, "loss": 83.3312, "step": 97620 }, { "epoch": 0.8076270835918435, "grad_norm": 765.6702270507812, "learning_rate": 1.2968060824031704e-06, "loss": 100.2946, "step": 97630 }, { "epoch": 0.8077098068412127, "grad_norm": 663.2040405273438, "learning_rate": 1.29585859929359e-06, "loss": 92.4886, "step": 97640 }, { "epoch": 0.8077925300905819, "grad_norm": 1108.396484375, "learning_rate": 1.2949114109055417e-06, "loss": 64.0099, "step": 97650 }, { "epoch": 0.8078752533399511, "grad_norm": 627.171142578125, "learning_rate": 1.2939645173143894e-06, "loss": 86.472, "step": 97660 }, { "epoch": 0.8079579765893204, "grad_norm": 2170.154541015625, "learning_rate": 1.293017918595471e-06, "loss": 131.4853, "step": 97670 }, { "epoch": 0.8080406998386896, "grad_norm": 752.8032836914062, "learning_rate": 1.2920716148241036e-06, "loss": 90.6649, "step": 97680 }, { "epoch": 0.8081234230880588, "grad_norm": 707.9442138671875, "learning_rate": 1.2911256060755794e-06, "loss": 95.8097, "step": 97690 }, { "epoch": 0.8082061463374282, "grad_norm": 716.3200073242188, "learning_rate": 1.2901798924251712e-06, "loss": 83.3109, "step": 97700 }, { "epoch": 0.8082888695867974, "grad_norm": 769.82763671875, "learning_rate": 1.2892344739481228e-06, "loss": 91.1391, "step": 97710 }, { "epoch": 0.8083715928361666, "grad_norm": 720.6710205078125, "learning_rate": 1.288289350719657e-06, "loss": 69.3394, "step": 97720 }, { "epoch": 0.8084543160855359, "grad_norm": 1892.12451171875, "learning_rate": 1.2873445228149733e-06, "loss": 99.0175, "step": 97730 }, { "epoch": 0.8085370393349051, "grad_norm": 1591.5841064453125, "learning_rate": 1.2863999903092473e-06, "loss": 87.3172, "step": 97740 }, { "epoch": 0.8086197625842743, "grad_norm": 2348.568603515625, "learning_rate": 1.2854557532776323e-06, "loss": 88.4138, "step": 97750 }, { "epoch": 0.8087024858336436, "grad_norm": 3484.744873046875, "learning_rate": 1.2845118117952544e-06, "loss": 120.2466, "step": 97760 }, { "epoch": 0.8087852090830128, "grad_norm": 1318.188720703125, "learning_rate": 1.2835681659372196e-06, "loss": 66.7873, "step": 97770 }, { "epoch": 0.808867932332382, "grad_norm": 612.43408203125, "learning_rate": 1.282624815778608e-06, "loss": 92.2864, "step": 97780 }, { "epoch": 0.8089506555817513, "grad_norm": 984.0529174804688, "learning_rate": 1.281681761394481e-06, "loss": 67.4189, "step": 97790 }, { "epoch": 0.8090333788311205, "grad_norm": 486.3183288574219, "learning_rate": 1.2807390028598715e-06, "loss": 81.0175, "step": 97800 }, { "epoch": 0.8091161020804897, "grad_norm": 899.5704956054688, "learning_rate": 1.2797965402497902e-06, "loss": 67.1271, "step": 97810 }, { "epoch": 0.809198825329859, "grad_norm": 440.5632019042969, "learning_rate": 1.278854373639225e-06, "loss": 75.3582, "step": 97820 }, { "epoch": 0.8092815485792282, "grad_norm": 1131.36083984375, "learning_rate": 1.2779125031031413e-06, "loss": 95.9433, "step": 97830 }, { "epoch": 0.8093642718285974, "grad_norm": 1148.0606689453125, "learning_rate": 1.2769709287164755e-06, "loss": 91.1547, "step": 97840 }, { "epoch": 0.8094469950779667, "grad_norm": 1007.1502685546875, "learning_rate": 1.2760296505541469e-06, "loss": 128.5402, "step": 97850 }, { "epoch": 0.8095297183273359, "grad_norm": 675.3720703125, "learning_rate": 1.2750886686910485e-06, "loss": 82.86, "step": 97860 }, { "epoch": 0.8096124415767051, "grad_norm": 554.8408813476562, "learning_rate": 1.2741479832020492e-06, "loss": 103.5147, "step": 97870 }, { "epoch": 0.8096951648260744, "grad_norm": 1001.0286865234375, "learning_rate": 1.2732075941619948e-06, "loss": 82.1101, "step": 97880 }, { "epoch": 0.8097778880754436, "grad_norm": 778.3543090820312, "learning_rate": 1.2722675016457091e-06, "loss": 81.8874, "step": 97890 }, { "epoch": 0.8098606113248128, "grad_norm": 948.7130737304688, "learning_rate": 1.271327705727991e-06, "loss": 99.7648, "step": 97900 }, { "epoch": 0.8099433345741821, "grad_norm": 3270.607421875, "learning_rate": 1.2703882064836142e-06, "loss": 103.626, "step": 97910 }, { "epoch": 0.8100260578235513, "grad_norm": 953.6922607421875, "learning_rate": 1.2694490039873336e-06, "loss": 93.171, "step": 97920 }, { "epoch": 0.8101087810729205, "grad_norm": 762.2303466796875, "learning_rate": 1.2685100983138731e-06, "loss": 63.3839, "step": 97930 }, { "epoch": 0.8101915043222898, "grad_norm": 976.2236328125, "learning_rate": 1.2675714895379387e-06, "loss": 94.6773, "step": 97940 }, { "epoch": 0.810274227571659, "grad_norm": 606.363525390625, "learning_rate": 1.2666331777342112e-06, "loss": 81.9096, "step": 97950 }, { "epoch": 0.8103569508210282, "grad_norm": 1317.268798828125, "learning_rate": 1.265695162977348e-06, "loss": 93.898, "step": 97960 }, { "epoch": 0.8104396740703975, "grad_norm": 738.7415161132812, "learning_rate": 1.2647574453419826e-06, "loss": 63.2881, "step": 97970 }, { "epoch": 0.8105223973197667, "grad_norm": 1187.66162109375, "learning_rate": 1.2638200249027233e-06, "loss": 84.0641, "step": 97980 }, { "epoch": 0.8106051205691359, "grad_norm": 1476.5244140625, "learning_rate": 1.2628829017341594e-06, "loss": 133.0605, "step": 97990 }, { "epoch": 0.8106878438185052, "grad_norm": 747.8192749023438, "learning_rate": 1.2619460759108521e-06, "loss": 98.8314, "step": 98000 }, { "epoch": 0.8107705670678744, "grad_norm": 760.4857177734375, "learning_rate": 1.2610095475073415e-06, "loss": 64.8025, "step": 98010 }, { "epoch": 0.8108532903172436, "grad_norm": 1347.1624755859375, "learning_rate": 1.26007331659814e-06, "loss": 89.3215, "step": 98020 }, { "epoch": 0.8109360135666129, "grad_norm": 778.6011352539062, "learning_rate": 1.2591373832577408e-06, "loss": 83.764, "step": 98030 }, { "epoch": 0.8110187368159821, "grad_norm": 825.260498046875, "learning_rate": 1.2582017475606117e-06, "loss": 106.7728, "step": 98040 }, { "epoch": 0.8111014600653513, "grad_norm": 1110.2823486328125, "learning_rate": 1.2572664095811976e-06, "loss": 93.7398, "step": 98050 }, { "epoch": 0.8111841833147206, "grad_norm": 764.0325317382812, "learning_rate": 1.2563313693939177e-06, "loss": 89.7206, "step": 98060 }, { "epoch": 0.8112669065640898, "grad_norm": 744.2551879882812, "learning_rate": 1.25539662707317e-06, "loss": 72.9195, "step": 98070 }, { "epoch": 0.811349629813459, "grad_norm": 857.3899536132812, "learning_rate": 1.2544621826933257e-06, "loss": 84.4764, "step": 98080 }, { "epoch": 0.8114323530628283, "grad_norm": 1150.891845703125, "learning_rate": 1.2535280363287388e-06, "loss": 73.521, "step": 98090 }, { "epoch": 0.8115150763121975, "grad_norm": 585.0152587890625, "learning_rate": 1.2525941880537307e-06, "loss": 74.5685, "step": 98100 }, { "epoch": 0.8115977995615667, "grad_norm": 927.9107666015625, "learning_rate": 1.2516606379426044e-06, "loss": 58.3165, "step": 98110 }, { "epoch": 0.8116805228109361, "grad_norm": 997.0220336914062, "learning_rate": 1.250727386069639e-06, "loss": 102.8923, "step": 98120 }, { "epoch": 0.8117632460603053, "grad_norm": 918.0977783203125, "learning_rate": 1.2497944325090882e-06, "loss": 92.9588, "step": 98130 }, { "epoch": 0.8118459693096745, "grad_norm": 437.514404296875, "learning_rate": 1.248861777335184e-06, "loss": 74.5204, "step": 98140 }, { "epoch": 0.8119286925590438, "grad_norm": 873.174072265625, "learning_rate": 1.247929420622132e-06, "loss": 76.6309, "step": 98150 }, { "epoch": 0.812011415808413, "grad_norm": 1936.2078857421875, "learning_rate": 1.2469973624441168e-06, "loss": 133.0517, "step": 98160 }, { "epoch": 0.8120941390577822, "grad_norm": 568.5994262695312, "learning_rate": 1.2460656028752976e-06, "loss": 93.9224, "step": 98170 }, { "epoch": 0.8121768623071515, "grad_norm": 882.7687377929688, "learning_rate": 1.24513414198981e-06, "loss": 120.2803, "step": 98180 }, { "epoch": 0.8122595855565207, "grad_norm": 753.4219970703125, "learning_rate": 1.244202979861766e-06, "loss": 78.88, "step": 98190 }, { "epoch": 0.8123423088058899, "grad_norm": 552.3338012695312, "learning_rate": 1.2432721165652544e-06, "loss": 56.6679, "step": 98200 }, { "epoch": 0.8124250320552592, "grad_norm": 948.5594482421875, "learning_rate": 1.2423415521743392e-06, "loss": 77.7815, "step": 98210 }, { "epoch": 0.8125077553046284, "grad_norm": 668.4248657226562, "learning_rate": 1.2414112867630619e-06, "loss": 82.7063, "step": 98220 }, { "epoch": 0.8125904785539976, "grad_norm": 507.14691162109375, "learning_rate": 1.2404813204054383e-06, "loss": 77.7765, "step": 98230 }, { "epoch": 0.8126732018033669, "grad_norm": 1215.5714111328125, "learning_rate": 1.239551653175462e-06, "loss": 132.3814, "step": 98240 }, { "epoch": 0.8127559250527361, "grad_norm": 711.6063232421875, "learning_rate": 1.238622285147103e-06, "loss": 81.4849, "step": 98250 }, { "epoch": 0.8128386483021053, "grad_norm": 1013.1691284179688, "learning_rate": 1.237693216394306e-06, "loss": 117.3906, "step": 98260 }, { "epoch": 0.8129213715514746, "grad_norm": 735.0127563476562, "learning_rate": 1.236764446990994e-06, "loss": 75.7862, "step": 98270 }, { "epoch": 0.8130040948008438, "grad_norm": 728.4838256835938, "learning_rate": 1.2358359770110634e-06, "loss": 64.463, "step": 98280 }, { "epoch": 0.813086818050213, "grad_norm": 746.21044921875, "learning_rate": 1.2349078065283886e-06, "loss": 84.8162, "step": 98290 }, { "epoch": 0.8131695412995823, "grad_norm": 745.414306640625, "learning_rate": 1.2339799356168207e-06, "loss": 91.913, "step": 98300 }, { "epoch": 0.8132522645489515, "grad_norm": 788.8545532226562, "learning_rate": 1.2330523643501858e-06, "loss": 73.4357, "step": 98310 }, { "epoch": 0.8133349877983207, "grad_norm": 1031.7391357421875, "learning_rate": 1.2321250928022855e-06, "loss": 94.2501, "step": 98320 }, { "epoch": 0.81341771104769, "grad_norm": 723.92919921875, "learning_rate": 1.2311981210468998e-06, "loss": 80.1034, "step": 98330 }, { "epoch": 0.8135004342970592, "grad_norm": 1015.01220703125, "learning_rate": 1.2302714491577834e-06, "loss": 126.1582, "step": 98340 }, { "epoch": 0.8135831575464284, "grad_norm": 718.5742797851562, "learning_rate": 1.2293450772086667e-06, "loss": 60.2428, "step": 98350 }, { "epoch": 0.8136658807957977, "grad_norm": 796.4895629882812, "learning_rate": 1.2284190052732575e-06, "loss": 75.6503, "step": 98360 }, { "epoch": 0.8137486040451669, "grad_norm": 479.49462890625, "learning_rate": 1.2274932334252387e-06, "loss": 111.6113, "step": 98370 }, { "epoch": 0.8138313272945361, "grad_norm": 723.4696655273438, "learning_rate": 1.2265677617382698e-06, "loss": 76.3011, "step": 98380 }, { "epoch": 0.8139140505439053, "grad_norm": 791.28173828125, "learning_rate": 1.2256425902859864e-06, "loss": 78.6211, "step": 98390 }, { "epoch": 0.8139967737932746, "grad_norm": 1258.62548828125, "learning_rate": 1.224717719142e-06, "loss": 89.4393, "step": 98400 }, { "epoch": 0.8140794970426438, "grad_norm": 820.9175415039062, "learning_rate": 1.2237931483798993e-06, "loss": 107.2661, "step": 98410 }, { "epoch": 0.814162220292013, "grad_norm": 1227.457275390625, "learning_rate": 1.2228688780732463e-06, "loss": 71.8221, "step": 98420 }, { "epoch": 0.8142449435413823, "grad_norm": 548.6934204101562, "learning_rate": 1.2219449082955825e-06, "loss": 115.0666, "step": 98430 }, { "epoch": 0.8143276667907515, "grad_norm": 997.916259765625, "learning_rate": 1.2210212391204234e-06, "loss": 82.2138, "step": 98440 }, { "epoch": 0.8144103900401207, "grad_norm": 1067.23486328125, "learning_rate": 1.2200978706212606e-06, "loss": 70.915, "step": 98450 }, { "epoch": 0.81449311328949, "grad_norm": 1409.0748291015625, "learning_rate": 1.2191748028715632e-06, "loss": 64.1739, "step": 98460 }, { "epoch": 0.8145758365388592, "grad_norm": 1686.0634765625, "learning_rate": 1.2182520359447753e-06, "loss": 88.7429, "step": 98470 }, { "epoch": 0.8146585597882284, "grad_norm": 990.0397338867188, "learning_rate": 1.2173295699143172e-06, "loss": 106.0889, "step": 98480 }, { "epoch": 0.8147412830375977, "grad_norm": 991.7344360351562, "learning_rate": 1.2164074048535846e-06, "loss": 95.5769, "step": 98490 }, { "epoch": 0.8148240062869669, "grad_norm": 1526.431396484375, "learning_rate": 1.2154855408359507e-06, "loss": 99.9895, "step": 98500 }, { "epoch": 0.8149067295363361, "grad_norm": 802.585205078125, "learning_rate": 1.2145639779347634e-06, "loss": 83.8027, "step": 98510 }, { "epoch": 0.8149894527857054, "grad_norm": 806.9586181640625, "learning_rate": 1.2136427162233493e-06, "loss": 82.421, "step": 98520 }, { "epoch": 0.8150721760350746, "grad_norm": 810.6295166015625, "learning_rate": 1.212721755775006e-06, "loss": 98.6951, "step": 98530 }, { "epoch": 0.8151548992844438, "grad_norm": 912.5408325195312, "learning_rate": 1.2118010966630095e-06, "loss": 83.3605, "step": 98540 }, { "epoch": 0.8152376225338132, "grad_norm": 1080.7371826171875, "learning_rate": 1.210880738960616e-06, "loss": 84.3936, "step": 98550 }, { "epoch": 0.8153203457831824, "grad_norm": 572.9903564453125, "learning_rate": 1.2099606827410521e-06, "loss": 64.3546, "step": 98560 }, { "epoch": 0.8154030690325516, "grad_norm": 914.2869873046875, "learning_rate": 1.2090409280775228e-06, "loss": 94.1465, "step": 98570 }, { "epoch": 0.8154857922819209, "grad_norm": 880.4005126953125, "learning_rate": 1.208121475043209e-06, "loss": 79.7238, "step": 98580 }, { "epoch": 0.8155685155312901, "grad_norm": 1575.5872802734375, "learning_rate": 1.2072023237112668e-06, "loss": 102.0344, "step": 98590 }, { "epoch": 0.8156512387806593, "grad_norm": 1086.993408203125, "learning_rate": 1.2062834741548291e-06, "loss": 90.5145, "step": 98600 }, { "epoch": 0.8157339620300286, "grad_norm": 759.2196655273438, "learning_rate": 1.2053649264470064e-06, "loss": 93.4888, "step": 98610 }, { "epoch": 0.8158166852793978, "grad_norm": 939.29736328125, "learning_rate": 1.2044466806608794e-06, "loss": 66.8826, "step": 98620 }, { "epoch": 0.815899408528767, "grad_norm": 691.4633178710938, "learning_rate": 1.203528736869511e-06, "loss": 75.7453, "step": 98630 }, { "epoch": 0.8159821317781363, "grad_norm": 843.3115234375, "learning_rate": 1.2026110951459364e-06, "loss": 104.9937, "step": 98640 }, { "epoch": 0.8160648550275055, "grad_norm": 707.0924682617188, "learning_rate": 1.2016937555631702e-06, "loss": 66.0707, "step": 98650 }, { "epoch": 0.8161475782768747, "grad_norm": 1021.8479614257812, "learning_rate": 1.2007767181942003e-06, "loss": 99.7831, "step": 98660 }, { "epoch": 0.816230301526244, "grad_norm": 850.4284057617188, "learning_rate": 1.1998599831119912e-06, "loss": 75.5687, "step": 98670 }, { "epoch": 0.8163130247756132, "grad_norm": 1095.51318359375, "learning_rate": 1.1989435503894826e-06, "loss": 101.9039, "step": 98680 }, { "epoch": 0.8163957480249824, "grad_norm": 1368.3948974609375, "learning_rate": 1.1980274200995928e-06, "loss": 91.4454, "step": 98690 }, { "epoch": 0.8164784712743517, "grad_norm": 969.6019287109375, "learning_rate": 1.197111592315211e-06, "loss": 84.7942, "step": 98700 }, { "epoch": 0.8165611945237209, "grad_norm": 531.8544311523438, "learning_rate": 1.1961960671092066e-06, "loss": 84.7319, "step": 98710 }, { "epoch": 0.8166439177730901, "grad_norm": 626.1632080078125, "learning_rate": 1.1952808445544246e-06, "loss": 70.9973, "step": 98720 }, { "epoch": 0.8167266410224594, "grad_norm": 718.9005737304688, "learning_rate": 1.1943659247236838e-06, "loss": 81.7739, "step": 98730 }, { "epoch": 0.8168093642718286, "grad_norm": 826.1453247070312, "learning_rate": 1.1934513076897798e-06, "loss": 77.8973, "step": 98740 }, { "epoch": 0.8168920875211978, "grad_norm": 670.5861206054688, "learning_rate": 1.1925369935254872e-06, "loss": 70.1664, "step": 98750 }, { "epoch": 0.8169748107705671, "grad_norm": 1031.8236083984375, "learning_rate": 1.191622982303552e-06, "loss": 96.2467, "step": 98760 }, { "epoch": 0.8170575340199363, "grad_norm": 862.9549560546875, "learning_rate": 1.1907092740966976e-06, "loss": 95.5136, "step": 98770 }, { "epoch": 0.8171402572693055, "grad_norm": 1296.6314697265625, "learning_rate": 1.1897958689776256e-06, "loss": 125.511, "step": 98780 }, { "epoch": 0.8172229805186748, "grad_norm": 840.3079833984375, "learning_rate": 1.1888827670190085e-06, "loss": 103.1509, "step": 98790 }, { "epoch": 0.817305703768044, "grad_norm": 822.6774291992188, "learning_rate": 1.1879699682934993e-06, "loss": 89.4122, "step": 98800 }, { "epoch": 0.8173884270174132, "grad_norm": 5291.2255859375, "learning_rate": 1.1870574728737244e-06, "loss": 141.3245, "step": 98810 }, { "epoch": 0.8174711502667825, "grad_norm": 973.1185302734375, "learning_rate": 1.1861452808322877e-06, "loss": 87.4708, "step": 98820 }, { "epoch": 0.8175538735161517, "grad_norm": 890.1282348632812, "learning_rate": 1.1852333922417658e-06, "loss": 85.0632, "step": 98830 }, { "epoch": 0.8176365967655209, "grad_norm": 1301.4791259765625, "learning_rate": 1.1843218071747171e-06, "loss": 96.202, "step": 98840 }, { "epoch": 0.8177193200148902, "grad_norm": 659.366455078125, "learning_rate": 1.183410525703671e-06, "loss": 86.4163, "step": 98850 }, { "epoch": 0.8178020432642594, "grad_norm": 824.1502685546875, "learning_rate": 1.182499547901133e-06, "loss": 80.8635, "step": 98860 }, { "epoch": 0.8178847665136286, "grad_norm": 1438.4332275390625, "learning_rate": 1.1815888738395882e-06, "loss": 104.6398, "step": 98870 }, { "epoch": 0.8179674897629979, "grad_norm": 850.4103393554688, "learning_rate": 1.1806785035914903e-06, "loss": 112.7476, "step": 98880 }, { "epoch": 0.8180502130123671, "grad_norm": 1077.782470703125, "learning_rate": 1.1797684372292762e-06, "loss": 61.9891, "step": 98890 }, { "epoch": 0.8181329362617363, "grad_norm": 528.4021606445312, "learning_rate": 1.1788586748253545e-06, "loss": 92.2062, "step": 98900 }, { "epoch": 0.8182156595111056, "grad_norm": 739.4981079101562, "learning_rate": 1.1779492164521117e-06, "loss": 67.1746, "step": 98910 }, { "epoch": 0.8182983827604748, "grad_norm": 935.0463256835938, "learning_rate": 1.177040062181909e-06, "loss": 65.3778, "step": 98920 }, { "epoch": 0.818381106009844, "grad_norm": 1494.3590087890625, "learning_rate": 1.1761312120870822e-06, "loss": 85.324, "step": 98930 }, { "epoch": 0.8184638292592133, "grad_norm": 514.237060546875, "learning_rate": 1.175222666239947e-06, "loss": 91.5604, "step": 98940 }, { "epoch": 0.8185465525085825, "grad_norm": 856.9160766601562, "learning_rate": 1.174314424712792e-06, "loss": 86.7412, "step": 98950 }, { "epoch": 0.8186292757579517, "grad_norm": 849.8342895507812, "learning_rate": 1.1734064875778795e-06, "loss": 81.7563, "step": 98960 }, { "epoch": 0.8187119990073211, "grad_norm": 648.2426147460938, "learning_rate": 1.1724988549074506e-06, "loss": 84.3063, "step": 98970 }, { "epoch": 0.8187947222566903, "grad_norm": 736.9127197265625, "learning_rate": 1.1715915267737228e-06, "loss": 123.6431, "step": 98980 }, { "epoch": 0.8188774455060595, "grad_norm": 2442.851318359375, "learning_rate": 1.1706845032488867e-06, "loss": 78.3283, "step": 98990 }, { "epoch": 0.8189601687554288, "grad_norm": 895.6693115234375, "learning_rate": 1.1697777844051105e-06, "loss": 67.0833, "step": 99000 }, { "epoch": 0.819042892004798, "grad_norm": 616.1843872070312, "learning_rate": 1.1688713703145377e-06, "loss": 97.3783, "step": 99010 }, { "epoch": 0.8191256152541672, "grad_norm": 753.047119140625, "learning_rate": 1.1679652610492875e-06, "loss": 78.6516, "step": 99020 }, { "epoch": 0.8192083385035365, "grad_norm": 1777.462890625, "learning_rate": 1.1670594566814536e-06, "loss": 78.5895, "step": 99030 }, { "epoch": 0.8192910617529057, "grad_norm": 898.646484375, "learning_rate": 1.1661539572831105e-06, "loss": 67.8556, "step": 99040 }, { "epoch": 0.8193737850022749, "grad_norm": 1100.24267578125, "learning_rate": 1.1652487629263003e-06, "loss": 92.8213, "step": 99050 }, { "epoch": 0.8194565082516442, "grad_norm": 727.767822265625, "learning_rate": 1.1643438736830476e-06, "loss": 87.5095, "step": 99060 }, { "epoch": 0.8195392315010134, "grad_norm": 556.3981323242188, "learning_rate": 1.1634392896253495e-06, "loss": 98.1264, "step": 99070 }, { "epoch": 0.8196219547503826, "grad_norm": 761.73828125, "learning_rate": 1.1625350108251793e-06, "loss": 85.1379, "step": 99080 }, { "epoch": 0.8197046779997519, "grad_norm": 553.5272827148438, "learning_rate": 1.1616310373544865e-06, "loss": 72.9364, "step": 99090 }, { "epoch": 0.8197874012491211, "grad_norm": 601.8910522460938, "learning_rate": 1.1607273692851967e-06, "loss": 87.1516, "step": 99100 }, { "epoch": 0.8198701244984903, "grad_norm": 369.5881652832031, "learning_rate": 1.1598240066892103e-06, "loss": 66.5272, "step": 99110 }, { "epoch": 0.8199528477478595, "grad_norm": 523.6664428710938, "learning_rate": 1.1589209496384035e-06, "loss": 84.0007, "step": 99120 }, { "epoch": 0.8200355709972288, "grad_norm": 1775.257080078125, "learning_rate": 1.1580181982046285e-06, "loss": 100.826, "step": 99130 }, { "epoch": 0.820118294246598, "grad_norm": 1454.958984375, "learning_rate": 1.1571157524597137e-06, "loss": 103.4577, "step": 99140 }, { "epoch": 0.8202010174959672, "grad_norm": 673.6801147460938, "learning_rate": 1.1562136124754613e-06, "loss": 89.0967, "step": 99150 }, { "epoch": 0.8202837407453365, "grad_norm": 514.0848388671875, "learning_rate": 1.1553117783236516e-06, "loss": 67.5358, "step": 99160 }, { "epoch": 0.8203664639947057, "grad_norm": 888.429443359375, "learning_rate": 1.154410250076039e-06, "loss": 101.8155, "step": 99170 }, { "epoch": 0.8204491872440749, "grad_norm": 515.0179443359375, "learning_rate": 1.1535090278043538e-06, "loss": 92.6118, "step": 99180 }, { "epoch": 0.8205319104934442, "grad_norm": 659.7156372070312, "learning_rate": 1.1526081115803017e-06, "loss": 77.2248, "step": 99190 }, { "epoch": 0.8206146337428134, "grad_norm": 680.0890502929688, "learning_rate": 1.1517075014755657e-06, "loss": 107.0121, "step": 99200 }, { "epoch": 0.8206973569921826, "grad_norm": 1098.1448974609375, "learning_rate": 1.1508071975618025e-06, "loss": 96.3001, "step": 99210 }, { "epoch": 0.8207800802415519, "grad_norm": 628.8194580078125, "learning_rate": 1.1499071999106449e-06, "loss": 94.3524, "step": 99220 }, { "epoch": 0.8208628034909211, "grad_norm": 908.0435791015625, "learning_rate": 1.149007508593702e-06, "loss": 89.7701, "step": 99230 }, { "epoch": 0.8209455267402903, "grad_norm": 635.8421630859375, "learning_rate": 1.148108123682558e-06, "loss": 69.7421, "step": 99240 }, { "epoch": 0.8210282499896596, "grad_norm": 540.8094482421875, "learning_rate": 1.1472090452487728e-06, "loss": 83.0186, "step": 99250 }, { "epoch": 0.8211109732390288, "grad_norm": 655.1044311523438, "learning_rate": 1.146310273363882e-06, "loss": 79.3086, "step": 99260 }, { "epoch": 0.821193696488398, "grad_norm": 579.1731567382812, "learning_rate": 1.1454118080993965e-06, "loss": 86.8246, "step": 99270 }, { "epoch": 0.8212764197377673, "grad_norm": 1405.385498046875, "learning_rate": 1.1445136495268033e-06, "loss": 124.9013, "step": 99280 }, { "epoch": 0.8213591429871365, "grad_norm": 553.7492065429688, "learning_rate": 1.143615797717565e-06, "loss": 80.3261, "step": 99290 }, { "epoch": 0.8214418662365057, "grad_norm": 2029.3385009765625, "learning_rate": 1.1427182527431192e-06, "loss": 95.4808, "step": 99300 }, { "epoch": 0.821524589485875, "grad_norm": 756.5120849609375, "learning_rate": 1.1418210146748792e-06, "loss": 67.8008, "step": 99310 }, { "epoch": 0.8216073127352442, "grad_norm": 396.6879577636719, "learning_rate": 1.1409240835842344e-06, "loss": 70.0907, "step": 99320 }, { "epoch": 0.8216900359846134, "grad_norm": 836.3045654296875, "learning_rate": 1.1400274595425499e-06, "loss": 81.1893, "step": 99330 }, { "epoch": 0.8217727592339827, "grad_norm": 831.0084838867188, "learning_rate": 1.139131142621165e-06, "loss": 108.7785, "step": 99340 }, { "epoch": 0.8218554824833519, "grad_norm": 594.3251953125, "learning_rate": 1.1382351328913964e-06, "loss": 90.5988, "step": 99350 }, { "epoch": 0.8219382057327211, "grad_norm": 997.0202026367188, "learning_rate": 1.137339430424535e-06, "loss": 98.3652, "step": 99360 }, { "epoch": 0.8220209289820904, "grad_norm": 1159.4931640625, "learning_rate": 1.1364440352918482e-06, "loss": 93.9449, "step": 99370 }, { "epoch": 0.8221036522314596, "grad_norm": 1038.8775634765625, "learning_rate": 1.1355489475645798e-06, "loss": 109.6431, "step": 99380 }, { "epoch": 0.8221863754808288, "grad_norm": 1000.264404296875, "learning_rate": 1.1346541673139428e-06, "loss": 81.506, "step": 99390 }, { "epoch": 0.8222690987301982, "grad_norm": 986.3641967773438, "learning_rate": 1.1337596946111357e-06, "loss": 91.9383, "step": 99400 }, { "epoch": 0.8223518219795674, "grad_norm": 548.0112915039062, "learning_rate": 1.132865529527326e-06, "loss": 79.2564, "step": 99410 }, { "epoch": 0.8224345452289366, "grad_norm": 912.9034423828125, "learning_rate": 1.1319716721336587e-06, "loss": 88.9765, "step": 99420 }, { "epoch": 0.8225172684783059, "grad_norm": 463.36883544921875, "learning_rate": 1.1310781225012535e-06, "loss": 58.9931, "step": 99430 }, { "epoch": 0.8225999917276751, "grad_norm": 1589.856201171875, "learning_rate": 1.130184880701206e-06, "loss": 102.2906, "step": 99440 }, { "epoch": 0.8226827149770443, "grad_norm": 653.0153198242188, "learning_rate": 1.1292919468045876e-06, "loss": 83.1269, "step": 99450 }, { "epoch": 0.8227654382264136, "grad_norm": 1476.3031005859375, "learning_rate": 1.128399320882445e-06, "loss": 81.7267, "step": 99460 }, { "epoch": 0.8228481614757828, "grad_norm": 633.2077026367188, "learning_rate": 1.1275070030058016e-06, "loss": 74.4383, "step": 99470 }, { "epoch": 0.822930884725152, "grad_norm": 1069.526123046875, "learning_rate": 1.1266149932456516e-06, "loss": 93.1175, "step": 99480 }, { "epoch": 0.8230136079745213, "grad_norm": 513.8902587890625, "learning_rate": 1.1257232916729693e-06, "loss": 78.2727, "step": 99490 }, { "epoch": 0.8230963312238905, "grad_norm": 552.6843872070312, "learning_rate": 1.1248318983587052e-06, "loss": 77.5065, "step": 99500 }, { "epoch": 0.8231790544732597, "grad_norm": 878.9365234375, "learning_rate": 1.1239408133737828e-06, "loss": 77.3874, "step": 99510 }, { "epoch": 0.823261777722629, "grad_norm": 1088.0330810546875, "learning_rate": 1.123050036789101e-06, "loss": 93.4705, "step": 99520 }, { "epoch": 0.8233445009719982, "grad_norm": 916.3295288085938, "learning_rate": 1.122159568675535e-06, "loss": 86.091, "step": 99530 }, { "epoch": 0.8234272242213674, "grad_norm": 595.7514038085938, "learning_rate": 1.121269409103935e-06, "loss": 110.6302, "step": 99540 }, { "epoch": 0.8235099474707367, "grad_norm": 819.598388671875, "learning_rate": 1.1203795581451288e-06, "loss": 116.6621, "step": 99550 }, { "epoch": 0.8235926707201059, "grad_norm": 669.3980712890625, "learning_rate": 1.1194900158699146e-06, "loss": 83.4266, "step": 99560 }, { "epoch": 0.8236753939694751, "grad_norm": 630.958251953125, "learning_rate": 1.1186007823490708e-06, "loss": 86.0332, "step": 99570 }, { "epoch": 0.8237581172188444, "grad_norm": 1063.6295166015625, "learning_rate": 1.1177118576533492e-06, "loss": 99.3058, "step": 99580 }, { "epoch": 0.8238408404682136, "grad_norm": 1337.9732666015625, "learning_rate": 1.116823241853477e-06, "loss": 54.1832, "step": 99590 }, { "epoch": 0.8239235637175828, "grad_norm": 680.459228515625, "learning_rate": 1.1159349350201587e-06, "loss": 86.0068, "step": 99600 }, { "epoch": 0.8240062869669521, "grad_norm": 976.7091674804688, "learning_rate": 1.1150469372240724e-06, "loss": 85.8299, "step": 99610 }, { "epoch": 0.8240890102163213, "grad_norm": 839.8695678710938, "learning_rate": 1.114159248535872e-06, "loss": 85.7322, "step": 99620 }, { "epoch": 0.8241717334656905, "grad_norm": 524.162841796875, "learning_rate": 1.1132718690261868e-06, "loss": 87.2623, "step": 99630 }, { "epoch": 0.8242544567150598, "grad_norm": 808.6865234375, "learning_rate": 1.1123847987656221e-06, "loss": 89.9084, "step": 99640 }, { "epoch": 0.824337179964429, "grad_norm": 1346.7198486328125, "learning_rate": 1.1114980378247565e-06, "loss": 73.3104, "step": 99650 }, { "epoch": 0.8244199032137982, "grad_norm": 860.0714111328125, "learning_rate": 1.1106115862741457e-06, "loss": 136.1401, "step": 99660 }, { "epoch": 0.8245026264631675, "grad_norm": 874.57666015625, "learning_rate": 1.1097254441843215e-06, "loss": 112.3625, "step": 99670 }, { "epoch": 0.8245853497125367, "grad_norm": 788.482666015625, "learning_rate": 1.1088396116257893e-06, "loss": 54.8236, "step": 99680 }, { "epoch": 0.8246680729619059, "grad_norm": 807.5397338867188, "learning_rate": 1.10795408866903e-06, "loss": 79.1535, "step": 99690 }, { "epoch": 0.8247507962112752, "grad_norm": 1106.181396484375, "learning_rate": 1.1070688753845032e-06, "loss": 117.5441, "step": 99700 }, { "epoch": 0.8248335194606444, "grad_norm": 782.138427734375, "learning_rate": 1.1061839718426399e-06, "loss": 92.2893, "step": 99710 }, { "epoch": 0.8249162427100136, "grad_norm": 1031.1993408203125, "learning_rate": 1.1052993781138477e-06, "loss": 80.9152, "step": 99720 }, { "epoch": 0.8249989659593829, "grad_norm": 443.52801513671875, "learning_rate": 1.1044150942685112e-06, "loss": 104.3299, "step": 99730 }, { "epoch": 0.8250816892087521, "grad_norm": 1952.7318115234375, "learning_rate": 1.1035311203769855e-06, "loss": 135.8231, "step": 99740 }, { "epoch": 0.8251644124581213, "grad_norm": 863.5310668945312, "learning_rate": 1.1026474565096068e-06, "loss": 85.8996, "step": 99750 }, { "epoch": 0.8252471357074906, "grad_norm": 756.4827880859375, "learning_rate": 1.1017641027366832e-06, "loss": 97.5026, "step": 99760 }, { "epoch": 0.8253298589568598, "grad_norm": 881.189453125, "learning_rate": 1.1008810591284997e-06, "loss": 100.8524, "step": 99770 }, { "epoch": 0.825412582206229, "grad_norm": 574.4223022460938, "learning_rate": 1.0999983257553137e-06, "loss": 82.259, "step": 99780 }, { "epoch": 0.8254953054555983, "grad_norm": 1166.70166015625, "learning_rate": 1.0991159026873643e-06, "loss": 79.2827, "step": 99790 }, { "epoch": 0.8255780287049675, "grad_norm": 712.7079467773438, "learning_rate": 1.0982337899948603e-06, "loss": 71.3755, "step": 99800 }, { "epoch": 0.8256607519543367, "grad_norm": 921.7284545898438, "learning_rate": 1.0973519877479878e-06, "loss": 50.2585, "step": 99810 }, { "epoch": 0.8257434752037061, "grad_norm": 480.2099914550781, "learning_rate": 1.0964704960169054e-06, "loss": 75.6909, "step": 99820 }, { "epoch": 0.8258261984530753, "grad_norm": 659.5361328125, "learning_rate": 1.0955893148717512e-06, "loss": 92.5624, "step": 99830 }, { "epoch": 0.8259089217024445, "grad_norm": 386.55810546875, "learning_rate": 1.0947084443826361e-06, "loss": 71.9917, "step": 99840 }, { "epoch": 0.8259916449518137, "grad_norm": 968.19140625, "learning_rate": 1.0938278846196471e-06, "loss": 75.8837, "step": 99850 }, { "epoch": 0.826074368201183, "grad_norm": 1425.4276123046875, "learning_rate": 1.0929476356528469e-06, "loss": 70.3875, "step": 99860 }, { "epoch": 0.8261570914505522, "grad_norm": 895.6668090820312, "learning_rate": 1.092067697552272e-06, "loss": 91.4471, "step": 99870 }, { "epoch": 0.8262398146999214, "grad_norm": 1033.364013671875, "learning_rate": 1.091188070387934e-06, "loss": 90.5695, "step": 99880 }, { "epoch": 0.8263225379492907, "grad_norm": 660.748046875, "learning_rate": 1.0903087542298241e-06, "loss": 96.0515, "step": 99890 }, { "epoch": 0.8264052611986599, "grad_norm": 480.1137390136719, "learning_rate": 1.0894297491479044e-06, "loss": 80.2592, "step": 99900 }, { "epoch": 0.8264879844480291, "grad_norm": 643.3427124023438, "learning_rate": 1.0885510552121115e-06, "loss": 59.6039, "step": 99910 }, { "epoch": 0.8265707076973984, "grad_norm": 1013.935546875, "learning_rate": 1.0876726724923597e-06, "loss": 77.1848, "step": 99920 }, { "epoch": 0.8266534309467676, "grad_norm": 841.8418579101562, "learning_rate": 1.086794601058538e-06, "loss": 83.5782, "step": 99930 }, { "epoch": 0.8267361541961368, "grad_norm": 337.9281311035156, "learning_rate": 1.0859168409805109e-06, "loss": 68.1758, "step": 99940 }, { "epoch": 0.8268188774455061, "grad_norm": 665.4752807617188, "learning_rate": 1.0850393923281176e-06, "loss": 100.0561, "step": 99950 }, { "epoch": 0.8269016006948753, "grad_norm": 569.7544555664062, "learning_rate": 1.0841622551711728e-06, "loss": 83.8357, "step": 99960 }, { "epoch": 0.8269843239442445, "grad_norm": 661.4277954101562, "learning_rate": 1.0832854295794659e-06, "loss": 65.98, "step": 99970 }, { "epoch": 0.8270670471936138, "grad_norm": 930.4299926757812, "learning_rate": 1.0824089156227624e-06, "loss": 77.1078, "step": 99980 }, { "epoch": 0.827149770442983, "grad_norm": 1039.9451904296875, "learning_rate": 1.0815327133708015e-06, "loss": 49.8599, "step": 99990 }, { "epoch": 0.8272324936923522, "grad_norm": 907.8119506835938, "learning_rate": 1.0806568228932995e-06, "loss": 87.3194, "step": 100000 }, { "epoch": 0.8273152169417215, "grad_norm": 665.1284790039062, "learning_rate": 1.079781244259947e-06, "loss": 113.8395, "step": 100010 }, { "epoch": 0.8273979401910907, "grad_norm": 1413.6357421875, "learning_rate": 1.0789059775404093e-06, "loss": 98.0291, "step": 100020 }, { "epoch": 0.8274806634404599, "grad_norm": 738.0913696289062, "learning_rate": 1.0780310228043278e-06, "loss": 97.2634, "step": 100030 }, { "epoch": 0.8275633866898292, "grad_norm": 610.05859375, "learning_rate": 1.0771563801213186e-06, "loss": 86.15, "step": 100040 }, { "epoch": 0.8276461099391984, "grad_norm": 723.9077758789062, "learning_rate": 1.076282049560972e-06, "loss": 92.4268, "step": 100050 }, { "epoch": 0.8277288331885676, "grad_norm": 939.378662109375, "learning_rate": 1.075408031192856e-06, "loss": 82.4279, "step": 100060 }, { "epoch": 0.8278115564379369, "grad_norm": 1257.1422119140625, "learning_rate": 1.0745343250865114e-06, "loss": 73.5913, "step": 100070 }, { "epoch": 0.8278942796873061, "grad_norm": 568.9317626953125, "learning_rate": 1.073660931311455e-06, "loss": 89.3267, "step": 100080 }, { "epoch": 0.8279770029366753, "grad_norm": 454.4713134765625, "learning_rate": 1.0727878499371786e-06, "loss": 71.951, "step": 100090 }, { "epoch": 0.8280597261860446, "grad_norm": 692.2919311523438, "learning_rate": 1.0719150810331497e-06, "loss": 70.2479, "step": 100100 }, { "epoch": 0.8281424494354138, "grad_norm": 1330.4888916015625, "learning_rate": 1.0710426246688105e-06, "loss": 102.4429, "step": 100110 }, { "epoch": 0.828225172684783, "grad_norm": 739.1103515625, "learning_rate": 1.0701704809135782e-06, "loss": 78.6834, "step": 100120 }, { "epoch": 0.8283078959341523, "grad_norm": 964.680908203125, "learning_rate": 1.0692986498368452e-06, "loss": 103.2081, "step": 100130 }, { "epoch": 0.8283906191835215, "grad_norm": 928.7930908203125, "learning_rate": 1.0684271315079786e-06, "loss": 75.7119, "step": 100140 }, { "epoch": 0.8284733424328907, "grad_norm": 771.4544677734375, "learning_rate": 1.0675559259963226e-06, "loss": 91.3507, "step": 100150 }, { "epoch": 0.82855606568226, "grad_norm": 1169.62109375, "learning_rate": 1.066685033371193e-06, "loss": 69.6342, "step": 100160 }, { "epoch": 0.8286387889316292, "grad_norm": 1027.3876953125, "learning_rate": 1.0658144537018844e-06, "loss": 68.9951, "step": 100170 }, { "epoch": 0.8287215121809984, "grad_norm": 561.0545043945312, "learning_rate": 1.0649441870576644e-06, "loss": 63.7628, "step": 100180 }, { "epoch": 0.8288042354303677, "grad_norm": 2845.06689453125, "learning_rate": 1.0640742335077758e-06, "loss": 74.9551, "step": 100190 }, { "epoch": 0.8288869586797369, "grad_norm": 514.4091186523438, "learning_rate": 1.0632045931214369e-06, "loss": 69.5426, "step": 100200 }, { "epoch": 0.8289696819291061, "grad_norm": 830.9058837890625, "learning_rate": 1.0623352659678415e-06, "loss": 88.3192, "step": 100210 }, { "epoch": 0.8290524051784754, "grad_norm": 681.1116333007812, "learning_rate": 1.0614662521161573e-06, "loss": 73.3991, "step": 100220 }, { "epoch": 0.8291351284278446, "grad_norm": 1170.43212890625, "learning_rate": 1.0605975516355277e-06, "loss": 56.5079, "step": 100230 }, { "epoch": 0.8292178516772138, "grad_norm": 640.3706665039062, "learning_rate": 1.059729164595073e-06, "loss": 76.578, "step": 100240 }, { "epoch": 0.8293005749265832, "grad_norm": 1166.9835205078125, "learning_rate": 1.0588610910638825e-06, "loss": 109.2789, "step": 100250 }, { "epoch": 0.8293832981759524, "grad_norm": 928.90185546875, "learning_rate": 1.057993331111029e-06, "loss": 89.8654, "step": 100260 }, { "epoch": 0.8294660214253216, "grad_norm": 927.5479736328125, "learning_rate": 1.057125884805555e-06, "loss": 82.5449, "step": 100270 }, { "epoch": 0.8295487446746909, "grad_norm": 980.9613037109375, "learning_rate": 1.0562587522164792e-06, "loss": 93.9806, "step": 100280 }, { "epoch": 0.8296314679240601, "grad_norm": 702.5833740234375, "learning_rate": 1.0553919334127943e-06, "loss": 67.3568, "step": 100290 }, { "epoch": 0.8297141911734293, "grad_norm": 1179.7244873046875, "learning_rate": 1.0545254284634703e-06, "loss": 96.2675, "step": 100300 }, { "epoch": 0.8297969144227986, "grad_norm": 885.7655639648438, "learning_rate": 1.053659237437451e-06, "loss": 112.9435, "step": 100310 }, { "epoch": 0.8298796376721678, "grad_norm": 940.1737060546875, "learning_rate": 1.052793360403655e-06, "loss": 80.4012, "step": 100320 }, { "epoch": 0.829962360921537, "grad_norm": 1659.75341796875, "learning_rate": 1.0519277974309771e-06, "loss": 96.4695, "step": 100330 }, { "epoch": 0.8300450841709063, "grad_norm": 809.1747436523438, "learning_rate": 1.0510625485882825e-06, "loss": 90.7687, "step": 100340 }, { "epoch": 0.8301278074202755, "grad_norm": 565.280029296875, "learning_rate": 1.0501976139444191e-06, "loss": 96.0154, "step": 100350 }, { "epoch": 0.8302105306696447, "grad_norm": 1022.1803588867188, "learning_rate": 1.0493329935682045e-06, "loss": 84.8389, "step": 100360 }, { "epoch": 0.830293253919014, "grad_norm": 615.2613525390625, "learning_rate": 1.0484686875284323e-06, "loss": 68.2956, "step": 100370 }, { "epoch": 0.8303759771683832, "grad_norm": 573.2850952148438, "learning_rate": 1.0476046958938719e-06, "loss": 95.2249, "step": 100380 }, { "epoch": 0.8304587004177524, "grad_norm": 504.07470703125, "learning_rate": 1.0467410187332667e-06, "loss": 77.4282, "step": 100390 }, { "epoch": 0.8305414236671217, "grad_norm": 1332.99169921875, "learning_rate": 1.045877656115335e-06, "loss": 117.6646, "step": 100400 }, { "epoch": 0.8306241469164909, "grad_norm": 665.0089721679688, "learning_rate": 1.045014608108773e-06, "loss": 91.1985, "step": 100410 }, { "epoch": 0.8307068701658601, "grad_norm": 1073.398681640625, "learning_rate": 1.0441518747822466e-06, "loss": 96.1906, "step": 100420 }, { "epoch": 0.8307895934152294, "grad_norm": 774.0573120117188, "learning_rate": 1.0432894562044004e-06, "loss": 83.1879, "step": 100430 }, { "epoch": 0.8308723166645986, "grad_norm": 1170.0047607421875, "learning_rate": 1.0424273524438521e-06, "loss": 98.2264, "step": 100440 }, { "epoch": 0.8309550399139678, "grad_norm": 567.7711181640625, "learning_rate": 1.041565563569198e-06, "loss": 72.0893, "step": 100450 }, { "epoch": 0.8310377631633371, "grad_norm": 946.6346435546875, "learning_rate": 1.040704089649005e-06, "loss": 101.0679, "step": 100460 }, { "epoch": 0.8311204864127063, "grad_norm": 609.6173095703125, "learning_rate": 1.039842930751817e-06, "loss": 93.027, "step": 100470 }, { "epoch": 0.8312032096620755, "grad_norm": 693.0861206054688, "learning_rate": 1.0389820869461525e-06, "loss": 75.2829, "step": 100480 }, { "epoch": 0.8312859329114448, "grad_norm": 987.9539184570312, "learning_rate": 1.0381215583005043e-06, "loss": 80.7349, "step": 100490 }, { "epoch": 0.831368656160814, "grad_norm": 645.7682495117188, "learning_rate": 1.0372613448833429e-06, "loss": 103.9739, "step": 100500 }, { "epoch": 0.8314513794101832, "grad_norm": 983.28271484375, "learning_rate": 1.0364014467631078e-06, "loss": 79.8077, "step": 100510 }, { "epoch": 0.8315341026595525, "grad_norm": 754.6640625, "learning_rate": 1.03554186400822e-06, "loss": 88.8747, "step": 100520 }, { "epoch": 0.8316168259089217, "grad_norm": 1055.522705078125, "learning_rate": 1.034682596687071e-06, "loss": 90.0075, "step": 100530 }, { "epoch": 0.8316995491582909, "grad_norm": 1082.3468017578125, "learning_rate": 1.0338236448680283e-06, "loss": 82.0571, "step": 100540 }, { "epoch": 0.8317822724076602, "grad_norm": 1203.2752685546875, "learning_rate": 1.0329650086194371e-06, "loss": 92.2786, "step": 100550 }, { "epoch": 0.8318649956570294, "grad_norm": 513.8129272460938, "learning_rate": 1.0321066880096142e-06, "loss": 100.5733, "step": 100560 }, { "epoch": 0.8319477189063986, "grad_norm": 637.0855712890625, "learning_rate": 1.0312486831068518e-06, "loss": 104.5459, "step": 100570 }, { "epoch": 0.8320304421557678, "grad_norm": 712.1045532226562, "learning_rate": 1.0303909939794176e-06, "loss": 69.6618, "step": 100580 }, { "epoch": 0.8321131654051371, "grad_norm": 686.3661499023438, "learning_rate": 1.0295336206955553e-06, "loss": 88.7248, "step": 100590 }, { "epoch": 0.8321958886545063, "grad_norm": 864.8832397460938, "learning_rate": 1.0286765633234795e-06, "loss": 83.1856, "step": 100600 }, { "epoch": 0.8322786119038755, "grad_norm": 716.6763916015625, "learning_rate": 1.027819821931384e-06, "loss": 115.8508, "step": 100610 }, { "epoch": 0.8323613351532448, "grad_norm": 1016.16748046875, "learning_rate": 1.0269633965874348e-06, "loss": 84.6865, "step": 100620 }, { "epoch": 0.832444058402614, "grad_norm": 910.2161254882812, "learning_rate": 1.0261072873597744e-06, "loss": 85.6898, "step": 100630 }, { "epoch": 0.8325267816519832, "grad_norm": 555.4244384765625, "learning_rate": 1.0252514943165188e-06, "loss": 95.8644, "step": 100640 }, { "epoch": 0.8326095049013525, "grad_norm": 991.6724853515625, "learning_rate": 1.0243960175257605e-06, "loss": 90.1952, "step": 100650 }, { "epoch": 0.8326922281507217, "grad_norm": 1144.724609375, "learning_rate": 1.0235408570555661e-06, "loss": 81.0206, "step": 100660 }, { "epoch": 0.832774951400091, "grad_norm": 1066.9254150390625, "learning_rate": 1.022686012973978e-06, "loss": 65.6117, "step": 100670 }, { "epoch": 0.8328576746494603, "grad_norm": 1321.929931640625, "learning_rate": 1.0218314853490086e-06, "loss": 115.5369, "step": 100680 }, { "epoch": 0.8329403978988295, "grad_norm": 735.5868530273438, "learning_rate": 1.0209772742486501e-06, "loss": 69.8609, "step": 100690 }, { "epoch": 0.8330231211481987, "grad_norm": 1540.8714599609375, "learning_rate": 1.0201233797408694e-06, "loss": 103.727, "step": 100700 }, { "epoch": 0.833105844397568, "grad_norm": 1017.089111328125, "learning_rate": 1.019269801893606e-06, "loss": 84.9051, "step": 100710 }, { "epoch": 0.8331885676469372, "grad_norm": 844.1512451171875, "learning_rate": 1.0184165407747755e-06, "loss": 62.8379, "step": 100720 }, { "epoch": 0.8332712908963064, "grad_norm": 1083.6983642578125, "learning_rate": 1.0175635964522661e-06, "loss": 76.4931, "step": 100730 }, { "epoch": 0.8333540141456757, "grad_norm": 1482.5361328125, "learning_rate": 1.0167109689939459e-06, "loss": 98.549, "step": 100740 }, { "epoch": 0.8334367373950449, "grad_norm": 684.1696166992188, "learning_rate": 1.0158586584676533e-06, "loss": 69.7886, "step": 100750 }, { "epoch": 0.8335194606444141, "grad_norm": 612.1447143554688, "learning_rate": 1.0150066649412038e-06, "loss": 65.4549, "step": 100760 }, { "epoch": 0.8336021838937834, "grad_norm": 578.9083251953125, "learning_rate": 1.0141549884823837e-06, "loss": 81.0457, "step": 100770 }, { "epoch": 0.8336849071431526, "grad_norm": 911.5106201171875, "learning_rate": 1.0133036291589587e-06, "loss": 87.0133, "step": 100780 }, { "epoch": 0.8337676303925218, "grad_norm": 845.751708984375, "learning_rate": 1.0124525870386676e-06, "loss": 75.2599, "step": 100790 }, { "epoch": 0.8338503536418911, "grad_norm": 547.631591796875, "learning_rate": 1.0116018621892237e-06, "loss": 111.8343, "step": 100800 }, { "epoch": 0.8339330768912603, "grad_norm": 1028.4368896484375, "learning_rate": 1.0107514546783154e-06, "loss": 92.0812, "step": 100810 }, { "epoch": 0.8340158001406295, "grad_norm": 653.3988037109375, "learning_rate": 1.0099013645736056e-06, "loss": 84.4328, "step": 100820 }, { "epoch": 0.8340985233899988, "grad_norm": 479.5120544433594, "learning_rate": 1.0090515919427308e-06, "loss": 93.0216, "step": 100830 }, { "epoch": 0.834181246639368, "grad_norm": 596.44677734375, "learning_rate": 1.0082021368533078e-06, "loss": 72.0628, "step": 100840 }, { "epoch": 0.8342639698887372, "grad_norm": 1943.263916015625, "learning_rate": 1.0073529993729191e-06, "loss": 97.5844, "step": 100850 }, { "epoch": 0.8343466931381065, "grad_norm": 601.1087646484375, "learning_rate": 1.0065041795691289e-06, "loss": 85.2571, "step": 100860 }, { "epoch": 0.8344294163874757, "grad_norm": 903.16064453125, "learning_rate": 1.0056556775094734e-06, "loss": 106.5367, "step": 100870 }, { "epoch": 0.8345121396368449, "grad_norm": 942.596923828125, "learning_rate": 1.0048074932614637e-06, "loss": 85.1813, "step": 100880 }, { "epoch": 0.8345948628862142, "grad_norm": 885.8580322265625, "learning_rate": 1.0039596268925867e-06, "loss": 126.8213, "step": 100890 }, { "epoch": 0.8346775861355834, "grad_norm": 819.2152709960938, "learning_rate": 1.0031120784703025e-06, "loss": 100.7738, "step": 100900 }, { "epoch": 0.8347603093849526, "grad_norm": 1307.6453857421875, "learning_rate": 1.0022648480620474e-06, "loss": 91.3189, "step": 100910 }, { "epoch": 0.8348430326343219, "grad_norm": 1590.2379150390625, "learning_rate": 1.001417935735231e-06, "loss": 102.9999, "step": 100920 }, { "epoch": 0.8349257558836911, "grad_norm": 919.4576416015625, "learning_rate": 1.0005713415572383e-06, "loss": 103.8122, "step": 100930 }, { "epoch": 0.8350084791330603, "grad_norm": 835.6314697265625, "learning_rate": 9.99725065595429e-07, "loss": 95.0695, "step": 100940 }, { "epoch": 0.8350912023824296, "grad_norm": 1234.212646484375, "learning_rate": 9.988791079171378e-07, "loss": 119.7262, "step": 100950 }, { "epoch": 0.8351739256317988, "grad_norm": 558.1318969726562, "learning_rate": 9.98033468589673e-07, "loss": 114.4958, "step": 100960 }, { "epoch": 0.835256648881168, "grad_norm": 611.1209106445312, "learning_rate": 9.971881476803185e-07, "loss": 76.4033, "step": 100970 }, { "epoch": 0.8353393721305373, "grad_norm": 1021.4954223632812, "learning_rate": 9.963431452563331e-07, "loss": 86.9654, "step": 100980 }, { "epoch": 0.8354220953799065, "grad_norm": 538.6202392578125, "learning_rate": 9.954984613849488e-07, "loss": 93.7995, "step": 100990 }, { "epoch": 0.8355048186292757, "grad_norm": 643.6083984375, "learning_rate": 9.94654096133374e-07, "loss": 80.0093, "step": 101000 }, { "epoch": 0.835587541878645, "grad_norm": 822.85791015625, "learning_rate": 9.938100495687907e-07, "loss": 74.7564, "step": 101010 }, { "epoch": 0.8356702651280142, "grad_norm": 732.5307006835938, "learning_rate": 9.929663217583562e-07, "loss": 98.5391, "step": 101020 }, { "epoch": 0.8357529883773834, "grad_norm": 762.5067138671875, "learning_rate": 9.92122912769201e-07, "loss": 108.4045, "step": 101030 }, { "epoch": 0.8358357116267527, "grad_norm": 957.1695556640625, "learning_rate": 9.912798226684322e-07, "loss": 86.2965, "step": 101040 }, { "epoch": 0.8359184348761219, "grad_norm": 614.9725952148438, "learning_rate": 9.90437051523131e-07, "loss": 68.6436, "step": 101050 }, { "epoch": 0.8360011581254911, "grad_norm": 594.4864501953125, "learning_rate": 9.895945994003514e-07, "loss": 76.7402, "step": 101060 }, { "epoch": 0.8360838813748604, "grad_norm": 601.7037353515625, "learning_rate": 9.887524663671243e-07, "loss": 92.365, "step": 101070 }, { "epoch": 0.8361666046242296, "grad_norm": 1368.8843994140625, "learning_rate": 9.879106524904547e-07, "loss": 100.576, "step": 101080 }, { "epoch": 0.8362493278735988, "grad_norm": 971.6827392578125, "learning_rate": 9.870691578373216e-07, "loss": 72.7704, "step": 101090 }, { "epoch": 0.8363320511229682, "grad_norm": 824.6112670898438, "learning_rate": 9.862279824746784e-07, "loss": 66.5189, "step": 101100 }, { "epoch": 0.8364147743723374, "grad_norm": 811.2408447265625, "learning_rate": 9.853871264694536e-07, "loss": 101.336, "step": 101110 }, { "epoch": 0.8364974976217066, "grad_norm": 577.3998413085938, "learning_rate": 9.845465898885509e-07, "loss": 91.787, "step": 101120 }, { "epoch": 0.8365802208710759, "grad_norm": 623.9453735351562, "learning_rate": 9.837063727988478e-07, "loss": 64.0953, "step": 101130 }, { "epoch": 0.8366629441204451, "grad_norm": 773.7106323242188, "learning_rate": 9.828664752671963e-07, "loss": 73.4519, "step": 101140 }, { "epoch": 0.8367456673698143, "grad_norm": 1168.3428955078125, "learning_rate": 9.82026897360423e-07, "loss": 84.8574, "step": 101150 }, { "epoch": 0.8368283906191836, "grad_norm": 1095.8922119140625, "learning_rate": 9.811876391453296e-07, "loss": 95.7534, "step": 101160 }, { "epoch": 0.8369111138685528, "grad_norm": 932.7236938476562, "learning_rate": 9.803487006886914e-07, "loss": 102.7611, "step": 101170 }, { "epoch": 0.836993837117922, "grad_norm": 983.7799072265625, "learning_rate": 9.795100820572601e-07, "loss": 72.2526, "step": 101180 }, { "epoch": 0.8370765603672913, "grad_norm": 440.9442138671875, "learning_rate": 9.78671783317761e-07, "loss": 82.0223, "step": 101190 }, { "epoch": 0.8371592836166605, "grad_norm": 806.7423706054688, "learning_rate": 9.778338045368901e-07, "loss": 94.5054, "step": 101200 }, { "epoch": 0.8372420068660297, "grad_norm": 1106.8468017578125, "learning_rate": 9.769961457813254e-07, "loss": 93.8625, "step": 101210 }, { "epoch": 0.837324730115399, "grad_norm": 983.9953002929688, "learning_rate": 9.761588071177141e-07, "loss": 109.6925, "step": 101220 }, { "epoch": 0.8374074533647682, "grad_norm": 850.5468139648438, "learning_rate": 9.753217886126797e-07, "loss": 84.854, "step": 101230 }, { "epoch": 0.8374901766141374, "grad_norm": 1075.742919921875, "learning_rate": 9.7448509033282e-07, "loss": 79.7995, "step": 101240 }, { "epoch": 0.8375728998635067, "grad_norm": 621.546875, "learning_rate": 9.73648712344707e-07, "loss": 81.4679, "step": 101250 }, { "epoch": 0.8376556231128759, "grad_norm": 409.54095458984375, "learning_rate": 9.728126547148875e-07, "loss": 86.3328, "step": 101260 }, { "epoch": 0.8377383463622451, "grad_norm": 1011.2172241210938, "learning_rate": 9.719769175098842e-07, "loss": 70.703, "step": 101270 }, { "epoch": 0.8378210696116144, "grad_norm": 919.0086669921875, "learning_rate": 9.711415007961899e-07, "loss": 94.6781, "step": 101280 }, { "epoch": 0.8379037928609836, "grad_norm": 908.69189453125, "learning_rate": 9.70306404640275e-07, "loss": 133.2232, "step": 101290 }, { "epoch": 0.8379865161103528, "grad_norm": 736.891845703125, "learning_rate": 9.69471629108587e-07, "loss": 87.2299, "step": 101300 }, { "epoch": 0.838069239359722, "grad_norm": 376.5754699707031, "learning_rate": 9.686371742675443e-07, "loss": 78.6543, "step": 101310 }, { "epoch": 0.8381519626090913, "grad_norm": 6189.4169921875, "learning_rate": 9.678030401835399e-07, "loss": 124.2069, "step": 101320 }, { "epoch": 0.8382346858584605, "grad_norm": 1200.981689453125, "learning_rate": 9.66969226922942e-07, "loss": 96.2958, "step": 101330 }, { "epoch": 0.8383174091078297, "grad_norm": 1382.705078125, "learning_rate": 9.66135734552094e-07, "loss": 82.9574, "step": 101340 }, { "epoch": 0.838400132357199, "grad_norm": 671.8123168945312, "learning_rate": 9.653025631373125e-07, "loss": 62.2783, "step": 101350 }, { "epoch": 0.8384828556065682, "grad_norm": 720.500732421875, "learning_rate": 9.644697127448904e-07, "loss": 67.8807, "step": 101360 }, { "epoch": 0.8385655788559374, "grad_norm": 402.5303955078125, "learning_rate": 9.636371834410918e-07, "loss": 76.3787, "step": 101370 }, { "epoch": 0.8386483021053067, "grad_norm": 698.8262329101562, "learning_rate": 9.62804975292158e-07, "loss": 90.8034, "step": 101380 }, { "epoch": 0.8387310253546759, "grad_norm": 715.2416381835938, "learning_rate": 9.619730883643026e-07, "loss": 70.7118, "step": 101390 }, { "epoch": 0.8388137486040451, "grad_norm": 1074.1124267578125, "learning_rate": 9.611415227237181e-07, "loss": 67.4193, "step": 101400 }, { "epoch": 0.8388964718534144, "grad_norm": 1017.0978393554688, "learning_rate": 9.603102784365664e-07, "loss": 95.0126, "step": 101410 }, { "epoch": 0.8389791951027836, "grad_norm": 695.4487915039062, "learning_rate": 9.594793555689868e-07, "loss": 113.1844, "step": 101420 }, { "epoch": 0.8390619183521528, "grad_norm": 556.859130859375, "learning_rate": 9.58648754187091e-07, "loss": 102.4575, "step": 101430 }, { "epoch": 0.8391446416015221, "grad_norm": 1115.348388671875, "learning_rate": 9.578184743569662e-07, "loss": 101.0772, "step": 101440 }, { "epoch": 0.8392273648508913, "grad_norm": 611.0310668945312, "learning_rate": 9.569885161446762e-07, "loss": 75.4577, "step": 101450 }, { "epoch": 0.8393100881002605, "grad_norm": 886.7157592773438, "learning_rate": 9.56158879616253e-07, "loss": 94.1332, "step": 101460 }, { "epoch": 0.8393928113496298, "grad_norm": 683.062744140625, "learning_rate": 9.553295648377097e-07, "loss": 63.7114, "step": 101470 }, { "epoch": 0.839475534598999, "grad_norm": 957.2645874023438, "learning_rate": 9.5450057187503e-07, "loss": 87.1377, "step": 101480 }, { "epoch": 0.8395582578483682, "grad_norm": 845.6005249023438, "learning_rate": 9.536719007941725e-07, "loss": 78.0765, "step": 101490 }, { "epoch": 0.8396409810977375, "grad_norm": 448.1517639160156, "learning_rate": 9.528435516610729e-07, "loss": 96.6448, "step": 101500 }, { "epoch": 0.8397237043471067, "grad_norm": 999.4203491210938, "learning_rate": 9.520155245416379e-07, "loss": 84.303, "step": 101510 }, { "epoch": 0.839806427596476, "grad_norm": 661.683349609375, "learning_rate": 9.511878195017499e-07, "loss": 71.9918, "step": 101520 }, { "epoch": 0.8398891508458453, "grad_norm": 1106.2100830078125, "learning_rate": 9.503604366072666e-07, "loss": 80.9662, "step": 101530 }, { "epoch": 0.8399718740952145, "grad_norm": 1053.9813232421875, "learning_rate": 9.495333759240171e-07, "loss": 99.4201, "step": 101540 }, { "epoch": 0.8400545973445837, "grad_norm": 1770.6314697265625, "learning_rate": 9.487066375178078e-07, "loss": 97.2852, "step": 101550 }, { "epoch": 0.840137320593953, "grad_norm": 831.5025634765625, "learning_rate": 9.47880221454418e-07, "loss": 87.7667, "step": 101560 }, { "epoch": 0.8402200438433222, "grad_norm": 619.336181640625, "learning_rate": 9.470541277996026e-07, "loss": 105.6826, "step": 101570 }, { "epoch": 0.8403027670926914, "grad_norm": 590.185302734375, "learning_rate": 9.462283566190894e-07, "loss": 116.2632, "step": 101580 }, { "epoch": 0.8403854903420607, "grad_norm": 652.5518798828125, "learning_rate": 9.454029079785809e-07, "loss": 90.1779, "step": 101590 }, { "epoch": 0.8404682135914299, "grad_norm": 699.6583251953125, "learning_rate": 9.445777819437557e-07, "loss": 91.0062, "step": 101600 }, { "epoch": 0.8405509368407991, "grad_norm": 601.4156494140625, "learning_rate": 9.437529785802647e-07, "loss": 69.9341, "step": 101610 }, { "epoch": 0.8406336600901684, "grad_norm": 309.7095642089844, "learning_rate": 9.429284979537346e-07, "loss": 72.0386, "step": 101620 }, { "epoch": 0.8407163833395376, "grad_norm": 1362.3778076171875, "learning_rate": 9.421043401297636e-07, "loss": 86.1247, "step": 101630 }, { "epoch": 0.8407991065889068, "grad_norm": 1056.7972412109375, "learning_rate": 9.412805051739266e-07, "loss": 94.5386, "step": 101640 }, { "epoch": 0.8408818298382761, "grad_norm": 394.115478515625, "learning_rate": 9.404569931517726e-07, "loss": 57.8183, "step": 101650 }, { "epoch": 0.8409645530876453, "grad_norm": 1883.9283447265625, "learning_rate": 9.396338041288255e-07, "loss": 83.5486, "step": 101660 }, { "epoch": 0.8410472763370145, "grad_norm": 773.7887573242188, "learning_rate": 9.388109381705817e-07, "loss": 64.771, "step": 101670 }, { "epoch": 0.8411299995863838, "grad_norm": 1021.2868041992188, "learning_rate": 9.379883953425134e-07, "loss": 84.9474, "step": 101680 }, { "epoch": 0.841212722835753, "grad_norm": 808.8427734375, "learning_rate": 9.371661757100648e-07, "loss": 94.7058, "step": 101690 }, { "epoch": 0.8412954460851222, "grad_norm": 811.6118774414062, "learning_rate": 9.363442793386606e-07, "loss": 95.701, "step": 101700 }, { "epoch": 0.8413781693344915, "grad_norm": 923.9641723632812, "learning_rate": 9.355227062936912e-07, "loss": 101.2932, "step": 101710 }, { "epoch": 0.8414608925838607, "grad_norm": 789.2193603515625, "learning_rate": 9.34701456640526e-07, "loss": 69.8859, "step": 101720 }, { "epoch": 0.8415436158332299, "grad_norm": 676.7222900390625, "learning_rate": 9.338805304445092e-07, "loss": 77.5232, "step": 101730 }, { "epoch": 0.8416263390825992, "grad_norm": 793.80615234375, "learning_rate": 9.330599277709579e-07, "loss": 83.3514, "step": 101740 }, { "epoch": 0.8417090623319684, "grad_norm": 857.0948486328125, "learning_rate": 9.322396486851626e-07, "loss": 94.8482, "step": 101750 }, { "epoch": 0.8417917855813376, "grad_norm": 867.2514038085938, "learning_rate": 9.314196932523906e-07, "loss": 96.5802, "step": 101760 }, { "epoch": 0.8418745088307069, "grad_norm": 1006.9816284179688, "learning_rate": 9.306000615378813e-07, "loss": 89.8353, "step": 101770 }, { "epoch": 0.8419572320800761, "grad_norm": 811.1846923828125, "learning_rate": 9.297807536068476e-07, "loss": 98.205, "step": 101780 }, { "epoch": 0.8420399553294453, "grad_norm": 760.5066528320312, "learning_rate": 9.289617695244818e-07, "loss": 109.3908, "step": 101790 }, { "epoch": 0.8421226785788146, "grad_norm": 855.790771484375, "learning_rate": 9.281431093559439e-07, "loss": 89.0774, "step": 101800 }, { "epoch": 0.8422054018281838, "grad_norm": 901.5059814453125, "learning_rate": 9.273247731663709e-07, "loss": 77.9359, "step": 101810 }, { "epoch": 0.842288125077553, "grad_norm": 856.0442504882812, "learning_rate": 9.26506761020875e-07, "loss": 103.1285, "step": 101820 }, { "epoch": 0.8423708483269223, "grad_norm": 919.891357421875, "learning_rate": 9.256890729845414e-07, "loss": 96.1979, "step": 101830 }, { "epoch": 0.8424535715762915, "grad_norm": 742.4312133789062, "learning_rate": 9.248717091224291e-07, "loss": 88.905, "step": 101840 }, { "epoch": 0.8425362948256607, "grad_norm": 697.6671142578125, "learning_rate": 9.240546694995733e-07, "loss": 71.3341, "step": 101850 }, { "epoch": 0.84261901807503, "grad_norm": 611.2695922851562, "learning_rate": 9.23237954180981e-07, "loss": 75.6786, "step": 101860 }, { "epoch": 0.8427017413243992, "grad_norm": 730.123779296875, "learning_rate": 9.224215632316346e-07, "loss": 69.9281, "step": 101870 }, { "epoch": 0.8427844645737684, "grad_norm": 824.8983764648438, "learning_rate": 9.216054967164916e-07, "loss": 95.3742, "step": 101880 }, { "epoch": 0.8428671878231377, "grad_norm": 783.8893432617188, "learning_rate": 9.207897547004812e-07, "loss": 81.9008, "step": 101890 }, { "epoch": 0.8429499110725069, "grad_norm": 996.7938842773438, "learning_rate": 9.199743372485093e-07, "loss": 117.712, "step": 101900 }, { "epoch": 0.8430326343218761, "grad_norm": 677.0623779296875, "learning_rate": 9.191592444254549e-07, "loss": 76.7643, "step": 101910 }, { "epoch": 0.8431153575712454, "grad_norm": 1035.1934814453125, "learning_rate": 9.183444762961702e-07, "loss": 91.2087, "step": 101920 }, { "epoch": 0.8431980808206146, "grad_norm": 940.440185546875, "learning_rate": 9.175300329254839e-07, "loss": 106.9464, "step": 101930 }, { "epoch": 0.8432808040699838, "grad_norm": 685.4142456054688, "learning_rate": 9.167159143781967e-07, "loss": 85.0712, "step": 101940 }, { "epoch": 0.8433635273193532, "grad_norm": 931.5978393554688, "learning_rate": 9.159021207190843e-07, "loss": 80.0363, "step": 101950 }, { "epoch": 0.8434462505687224, "grad_norm": 683.4605102539062, "learning_rate": 9.150886520128966e-07, "loss": 107.6678, "step": 101960 }, { "epoch": 0.8435289738180916, "grad_norm": 1009.03369140625, "learning_rate": 9.142755083243577e-07, "loss": 84.9778, "step": 101970 }, { "epoch": 0.8436116970674609, "grad_norm": 847.3112182617188, "learning_rate": 9.134626897181659e-07, "loss": 75.6202, "step": 101980 }, { "epoch": 0.8436944203168301, "grad_norm": 609.8668823242188, "learning_rate": 9.126501962589928e-07, "loss": 90.4189, "step": 101990 }, { "epoch": 0.8437771435661993, "grad_norm": 860.5015869140625, "learning_rate": 9.118380280114858e-07, "loss": 82.1258, "step": 102000 }, { "epoch": 0.8438598668155685, "grad_norm": 771.5186157226562, "learning_rate": 9.110261850402641e-07, "loss": 77.5802, "step": 102010 }, { "epoch": 0.8439425900649378, "grad_norm": 814.3161010742188, "learning_rate": 9.102146674099232e-07, "loss": 74.584, "step": 102020 }, { "epoch": 0.844025313314307, "grad_norm": 399.5432434082031, "learning_rate": 9.094034751850317e-07, "loss": 119.564, "step": 102030 }, { "epoch": 0.8441080365636762, "grad_norm": 472.89300537109375, "learning_rate": 9.085926084301327e-07, "loss": 73.9548, "step": 102040 }, { "epoch": 0.8441907598130455, "grad_norm": 1127.3804931640625, "learning_rate": 9.077820672097426e-07, "loss": 69.2523, "step": 102050 }, { "epoch": 0.8442734830624147, "grad_norm": 555.0116577148438, "learning_rate": 9.069718515883524e-07, "loss": 92.0813, "step": 102060 }, { "epoch": 0.8443562063117839, "grad_norm": 739.4345703125, "learning_rate": 9.06161961630428e-07, "loss": 74.4284, "step": 102070 }, { "epoch": 0.8444389295611532, "grad_norm": 1793.3900146484375, "learning_rate": 9.053523974004075e-07, "loss": 115.2249, "step": 102080 }, { "epoch": 0.8445216528105224, "grad_norm": 1008.2157592773438, "learning_rate": 9.045431589627052e-07, "loss": 80.6738, "step": 102090 }, { "epoch": 0.8446043760598916, "grad_norm": 867.9142456054688, "learning_rate": 9.037342463817084e-07, "loss": 84.0623, "step": 102100 }, { "epoch": 0.8446870993092609, "grad_norm": 1016.9595336914062, "learning_rate": 9.029256597217778e-07, "loss": 97.6914, "step": 102110 }, { "epoch": 0.8447698225586301, "grad_norm": 538.5012817382812, "learning_rate": 9.021173990472498e-07, "loss": 72.0225, "step": 102120 }, { "epoch": 0.8448525458079993, "grad_norm": 910.0997314453125, "learning_rate": 9.013094644224346e-07, "loss": 87.6451, "step": 102130 }, { "epoch": 0.8449352690573686, "grad_norm": 567.3370361328125, "learning_rate": 9.005018559116135e-07, "loss": 68.3961, "step": 102140 }, { "epoch": 0.8450179923067378, "grad_norm": 592.2027587890625, "learning_rate": 8.996945735790447e-07, "loss": 95.2749, "step": 102150 }, { "epoch": 0.845100715556107, "grad_norm": 781.4312133789062, "learning_rate": 8.988876174889616e-07, "loss": 62.1267, "step": 102160 }, { "epoch": 0.8451834388054763, "grad_norm": 1090.7720947265625, "learning_rate": 8.980809877055696e-07, "loss": 76.1743, "step": 102170 }, { "epoch": 0.8452661620548455, "grad_norm": 1055.83447265625, "learning_rate": 8.97274684293048e-07, "loss": 90.5386, "step": 102180 }, { "epoch": 0.8453488853042147, "grad_norm": 443.5655822753906, "learning_rate": 8.964687073155509e-07, "loss": 100.6941, "step": 102190 }, { "epoch": 0.845431608553584, "grad_norm": 919.127685546875, "learning_rate": 8.95663056837206e-07, "loss": 86.7603, "step": 102200 }, { "epoch": 0.8455143318029532, "grad_norm": 703.267822265625, "learning_rate": 8.948577329221153e-07, "loss": 92.9238, "step": 102210 }, { "epoch": 0.8455970550523224, "grad_norm": 597.8628540039062, "learning_rate": 8.940527356343564e-07, "loss": 80.7923, "step": 102220 }, { "epoch": 0.8456797783016917, "grad_norm": 985.700927734375, "learning_rate": 8.93248065037976e-07, "loss": 72.7414, "step": 102230 }, { "epoch": 0.8457625015510609, "grad_norm": 608.7626953125, "learning_rate": 8.924437211969983e-07, "loss": 95.9518, "step": 102240 }, { "epoch": 0.8458452248004301, "grad_norm": 734.87841796875, "learning_rate": 8.916397041754238e-07, "loss": 93.5361, "step": 102250 }, { "epoch": 0.8459279480497994, "grad_norm": 707.2564697265625, "learning_rate": 8.90836014037223e-07, "loss": 87.7497, "step": 102260 }, { "epoch": 0.8460106712991686, "grad_norm": 466.18157958984375, "learning_rate": 8.900326508463425e-07, "loss": 99.7546, "step": 102270 }, { "epoch": 0.8460933945485378, "grad_norm": 665.1851806640625, "learning_rate": 8.892296146667018e-07, "loss": 98.5748, "step": 102280 }, { "epoch": 0.8461761177979071, "grad_norm": 1132.5751953125, "learning_rate": 8.884269055621941e-07, "loss": 91.5124, "step": 102290 }, { "epoch": 0.8462588410472763, "grad_norm": 954.9869384765625, "learning_rate": 8.876245235966884e-07, "loss": 124.0621, "step": 102300 }, { "epoch": 0.8463415642966455, "grad_norm": 1059.5760498046875, "learning_rate": 8.868224688340277e-07, "loss": 87.5652, "step": 102310 }, { "epoch": 0.8464242875460148, "grad_norm": 886.4304809570312, "learning_rate": 8.860207413380245e-07, "loss": 93.7244, "step": 102320 }, { "epoch": 0.846507010795384, "grad_norm": 954.0653076171875, "learning_rate": 8.852193411724702e-07, "loss": 94.8709, "step": 102330 }, { "epoch": 0.8465897340447532, "grad_norm": 650.5189208984375, "learning_rate": 8.844182684011276e-07, "loss": 68.5645, "step": 102340 }, { "epoch": 0.8466724572941225, "grad_norm": 680.576904296875, "learning_rate": 8.83617523087737e-07, "loss": 66.5604, "step": 102350 }, { "epoch": 0.8467551805434917, "grad_norm": 767.3767700195312, "learning_rate": 8.828171052960077e-07, "loss": 114.503, "step": 102360 }, { "epoch": 0.846837903792861, "grad_norm": 712.951171875, "learning_rate": 8.820170150896268e-07, "loss": 114.4278, "step": 102370 }, { "epoch": 0.8469206270422303, "grad_norm": 753.8101806640625, "learning_rate": 8.812172525322527e-07, "loss": 66.3853, "step": 102380 }, { "epoch": 0.8470033502915995, "grad_norm": 927.0662231445312, "learning_rate": 8.8041781768752e-07, "loss": 63.6904, "step": 102390 }, { "epoch": 0.8470860735409687, "grad_norm": 1022.492919921875, "learning_rate": 8.796187106190346e-07, "loss": 80.9226, "step": 102400 }, { "epoch": 0.847168796790338, "grad_norm": 647.39697265625, "learning_rate": 8.788199313903778e-07, "loss": 103.7165, "step": 102410 }, { "epoch": 0.8472515200397072, "grad_norm": 835.044189453125, "learning_rate": 8.78021480065106e-07, "loss": 99.1959, "step": 102420 }, { "epoch": 0.8473342432890764, "grad_norm": 1518.077880859375, "learning_rate": 8.772233567067473e-07, "loss": 88.389, "step": 102430 }, { "epoch": 0.8474169665384457, "grad_norm": 717.4688110351562, "learning_rate": 8.764255613788037e-07, "loss": 77.5931, "step": 102440 }, { "epoch": 0.8474996897878149, "grad_norm": 487.7947998046875, "learning_rate": 8.756280941447554e-07, "loss": 83.3511, "step": 102450 }, { "epoch": 0.8475824130371841, "grad_norm": 1047.3568115234375, "learning_rate": 8.748309550680506e-07, "loss": 115.5404, "step": 102460 }, { "epoch": 0.8476651362865534, "grad_norm": 601.6625366210938, "learning_rate": 8.740341442121153e-07, "loss": 80.3495, "step": 102470 }, { "epoch": 0.8477478595359226, "grad_norm": 533.0794067382812, "learning_rate": 8.732376616403487e-07, "loss": 76.5713, "step": 102480 }, { "epoch": 0.8478305827852918, "grad_norm": 918.6082153320312, "learning_rate": 8.724415074161207e-07, "loss": 88.8103, "step": 102490 }, { "epoch": 0.8479133060346611, "grad_norm": 590.0209350585938, "learning_rate": 8.716456816027791e-07, "loss": 80.2414, "step": 102500 }, { "epoch": 0.8479960292840303, "grad_norm": 869.5015869140625, "learning_rate": 8.708501842636441e-07, "loss": 91.9651, "step": 102510 }, { "epoch": 0.8480787525333995, "grad_norm": 636.9760131835938, "learning_rate": 8.700550154620091e-07, "loss": 70.51, "step": 102520 }, { "epoch": 0.8481614757827688, "grad_norm": 512.7169189453125, "learning_rate": 8.692601752611435e-07, "loss": 74.1003, "step": 102530 }, { "epoch": 0.848244199032138, "grad_norm": 1306.0889892578125, "learning_rate": 8.684656637242866e-07, "loss": 87.0951, "step": 102540 }, { "epoch": 0.8483269222815072, "grad_norm": 2022.1649169921875, "learning_rate": 8.676714809146569e-07, "loss": 118.8545, "step": 102550 }, { "epoch": 0.8484096455308765, "grad_norm": 810.895751953125, "learning_rate": 8.668776268954437e-07, "loss": 87.6652, "step": 102560 }, { "epoch": 0.8484923687802457, "grad_norm": 636.8590698242188, "learning_rate": 8.660841017298082e-07, "loss": 66.9613, "step": 102570 }, { "epoch": 0.8485750920296149, "grad_norm": 1104.5308837890625, "learning_rate": 8.652909054808884e-07, "loss": 99.1564, "step": 102580 }, { "epoch": 0.8486578152789842, "grad_norm": 802.7866821289062, "learning_rate": 8.644980382117956e-07, "loss": 89.1793, "step": 102590 }, { "epoch": 0.8487405385283534, "grad_norm": 734.8566284179688, "learning_rate": 8.637054999856148e-07, "loss": 60.249, "step": 102600 }, { "epoch": 0.8488232617777226, "grad_norm": 743.7099609375, "learning_rate": 8.629132908654042e-07, "loss": 92.0852, "step": 102610 }, { "epoch": 0.8489059850270919, "grad_norm": 810.8662719726562, "learning_rate": 8.621214109141962e-07, "loss": 84.611, "step": 102620 }, { "epoch": 0.8489887082764611, "grad_norm": 723.8987426757812, "learning_rate": 8.613298601949971e-07, "loss": 98.8016, "step": 102630 }, { "epoch": 0.8490714315258303, "grad_norm": 1335.8057861328125, "learning_rate": 8.605386387707865e-07, "loss": 79.3897, "step": 102640 }, { "epoch": 0.8491541547751996, "grad_norm": 916.5572509765625, "learning_rate": 8.597477467045207e-07, "loss": 84.2059, "step": 102650 }, { "epoch": 0.8492368780245688, "grad_norm": 1256.606689453125, "learning_rate": 8.589571840591243e-07, "loss": 95.1835, "step": 102660 }, { "epoch": 0.849319601273938, "grad_norm": 850.797119140625, "learning_rate": 8.581669508975005e-07, "loss": 84.3254, "step": 102670 }, { "epoch": 0.8494023245233073, "grad_norm": 709.500244140625, "learning_rate": 8.573770472825233e-07, "loss": 73.6143, "step": 102680 }, { "epoch": 0.8494850477726765, "grad_norm": 595.16650390625, "learning_rate": 8.565874732770429e-07, "loss": 72.705, "step": 102690 }, { "epoch": 0.8495677710220457, "grad_norm": 871.81005859375, "learning_rate": 8.55798228943881e-07, "loss": 92.2849, "step": 102700 }, { "epoch": 0.849650494271415, "grad_norm": 950.0880737304688, "learning_rate": 8.550093143458355e-07, "loss": 90.0403, "step": 102710 }, { "epoch": 0.8497332175207842, "grad_norm": 855.10888671875, "learning_rate": 8.542207295456751e-07, "loss": 91.0368, "step": 102720 }, { "epoch": 0.8498159407701534, "grad_norm": 846.3700561523438, "learning_rate": 8.53432474606144e-07, "loss": 101.0878, "step": 102730 }, { "epoch": 0.8498986640195226, "grad_norm": 732.4088745117188, "learning_rate": 8.526445495899627e-07, "loss": 95.0499, "step": 102740 }, { "epoch": 0.8499813872688919, "grad_norm": 723.0433349609375, "learning_rate": 8.518569545598198e-07, "loss": 65.2936, "step": 102750 }, { "epoch": 0.8500641105182611, "grad_norm": 791.6629638671875, "learning_rate": 8.510696895783821e-07, "loss": 92.8047, "step": 102760 }, { "epoch": 0.8501468337676303, "grad_norm": 669.09521484375, "learning_rate": 8.502827547082876e-07, "loss": 79.3404, "step": 102770 }, { "epoch": 0.8502295570169996, "grad_norm": 292.19622802734375, "learning_rate": 8.494961500121501e-07, "loss": 74.0627, "step": 102780 }, { "epoch": 0.8503122802663688, "grad_norm": 881.7793579101562, "learning_rate": 8.487098755525552e-07, "loss": 85.533, "step": 102790 }, { "epoch": 0.850395003515738, "grad_norm": 861.0848999023438, "learning_rate": 8.47923931392064e-07, "loss": 66.5782, "step": 102800 }, { "epoch": 0.8504777267651074, "grad_norm": 857.7160034179688, "learning_rate": 8.471383175932102e-07, "loss": 99.694, "step": 102810 }, { "epoch": 0.8505604500144766, "grad_norm": 544.4356689453125, "learning_rate": 8.463530342185011e-07, "loss": 83.8572, "step": 102820 }, { "epoch": 0.8506431732638458, "grad_norm": 837.5228881835938, "learning_rate": 8.455680813304185e-07, "loss": 65.1481, "step": 102830 }, { "epoch": 0.8507258965132151, "grad_norm": 898.0479736328125, "learning_rate": 8.447834589914172e-07, "loss": 82.4079, "step": 102840 }, { "epoch": 0.8508086197625843, "grad_norm": 715.7451782226562, "learning_rate": 8.439991672639264e-07, "loss": 89.7485, "step": 102850 }, { "epoch": 0.8508913430119535, "grad_norm": 1106.7410888671875, "learning_rate": 8.432152062103488e-07, "loss": 75.6558, "step": 102860 }, { "epoch": 0.8509740662613228, "grad_norm": 957.0466918945312, "learning_rate": 8.424315758930596e-07, "loss": 94.4857, "step": 102870 }, { "epoch": 0.851056789510692, "grad_norm": 787.596435546875, "learning_rate": 8.416482763744093e-07, "loss": 130.3268, "step": 102880 }, { "epoch": 0.8511395127600612, "grad_norm": 982.1759033203125, "learning_rate": 8.408653077167217e-07, "loss": 101.9882, "step": 102890 }, { "epoch": 0.8512222360094305, "grad_norm": 746.855224609375, "learning_rate": 8.400826699822933e-07, "loss": 100.449, "step": 102900 }, { "epoch": 0.8513049592587997, "grad_norm": 440.57867431640625, "learning_rate": 8.393003632333957e-07, "loss": 70.7138, "step": 102910 }, { "epoch": 0.8513876825081689, "grad_norm": 402.388427734375, "learning_rate": 8.385183875322733e-07, "loss": 111.9875, "step": 102920 }, { "epoch": 0.8514704057575382, "grad_norm": 730.9739990234375, "learning_rate": 8.377367429411443e-07, "loss": 68.7929, "step": 102930 }, { "epoch": 0.8515531290069074, "grad_norm": 1171.7159423828125, "learning_rate": 8.369554295221999e-07, "loss": 123.8448, "step": 102940 }, { "epoch": 0.8516358522562766, "grad_norm": 828.75048828125, "learning_rate": 8.361744473376066e-07, "loss": 78.1169, "step": 102950 }, { "epoch": 0.8517185755056459, "grad_norm": 694.2513427734375, "learning_rate": 8.353937964495029e-07, "loss": 103.3453, "step": 102960 }, { "epoch": 0.8518012987550151, "grad_norm": 1413.7930908203125, "learning_rate": 8.346134769200021e-07, "loss": 83.3088, "step": 102970 }, { "epoch": 0.8518840220043843, "grad_norm": 860.365234375, "learning_rate": 8.338334888111899e-07, "loss": 77.3941, "step": 102980 }, { "epoch": 0.8519667452537536, "grad_norm": 750.07177734375, "learning_rate": 8.330538321851284e-07, "loss": 126.0141, "step": 102990 }, { "epoch": 0.8520494685031228, "grad_norm": 850.6483764648438, "learning_rate": 8.322745071038474e-07, "loss": 96.3906, "step": 103000 }, { "epoch": 0.852132191752492, "grad_norm": 1176.2943115234375, "learning_rate": 8.314955136293579e-07, "loss": 80.5075, "step": 103010 }, { "epoch": 0.8522149150018613, "grad_norm": 1178.8140869140625, "learning_rate": 8.307168518236391e-07, "loss": 113.7988, "step": 103020 }, { "epoch": 0.8522976382512305, "grad_norm": 824.7714233398438, "learning_rate": 8.299385217486466e-07, "loss": 103.9782, "step": 103030 }, { "epoch": 0.8523803615005997, "grad_norm": 816.1295776367188, "learning_rate": 8.291605234663075e-07, "loss": 102.6652, "step": 103040 }, { "epoch": 0.852463084749969, "grad_norm": 942.5098876953125, "learning_rate": 8.283828570385239e-07, "loss": 83.2088, "step": 103050 }, { "epoch": 0.8525458079993382, "grad_norm": 1104.326904296875, "learning_rate": 8.276055225271718e-07, "loss": 79.5488, "step": 103060 }, { "epoch": 0.8526285312487074, "grad_norm": 845.7022705078125, "learning_rate": 8.26828519994099e-07, "loss": 79.7386, "step": 103070 }, { "epoch": 0.8527112544980767, "grad_norm": 627.2059326171875, "learning_rate": 8.260518495011299e-07, "loss": 104.046, "step": 103080 }, { "epoch": 0.8527939777474459, "grad_norm": 1893.8099365234375, "learning_rate": 8.25275511110058e-07, "loss": 90.7877, "step": 103090 }, { "epoch": 0.8528767009968151, "grad_norm": 1187.659912109375, "learning_rate": 8.244995048826532e-07, "loss": 103.0294, "step": 103100 }, { "epoch": 0.8529594242461844, "grad_norm": 673.2490844726562, "learning_rate": 8.237238308806611e-07, "loss": 96.1213, "step": 103110 }, { "epoch": 0.8530421474955536, "grad_norm": 440.6217041015625, "learning_rate": 8.229484891657974e-07, "loss": 80.7047, "step": 103120 }, { "epoch": 0.8531248707449228, "grad_norm": 816.9833984375, "learning_rate": 8.221734797997522e-07, "loss": 72.0308, "step": 103130 }, { "epoch": 0.8532075939942921, "grad_norm": 1029.72900390625, "learning_rate": 8.213988028441893e-07, "loss": 90.9839, "step": 103140 }, { "epoch": 0.8532903172436613, "grad_norm": 952.5328979492188, "learning_rate": 8.20624458360747e-07, "loss": 89.0053, "step": 103150 }, { "epoch": 0.8533730404930305, "grad_norm": 1081.2255859375, "learning_rate": 8.198504464110358e-07, "loss": 122.348, "step": 103160 }, { "epoch": 0.8534557637423998, "grad_norm": 1057.4853515625, "learning_rate": 8.190767670566407e-07, "loss": 92.9125, "step": 103170 }, { "epoch": 0.853538486991769, "grad_norm": 975.9583129882812, "learning_rate": 8.183034203591189e-07, "loss": 117.357, "step": 103180 }, { "epoch": 0.8536212102411382, "grad_norm": 674.9142456054688, "learning_rate": 8.175304063800021e-07, "loss": 82.4939, "step": 103190 }, { "epoch": 0.8537039334905075, "grad_norm": 896.0360717773438, "learning_rate": 8.167577251807951e-07, "loss": 71.3201, "step": 103200 }, { "epoch": 0.8537866567398767, "grad_norm": 779.8267211914062, "learning_rate": 8.159853768229786e-07, "loss": 78.3528, "step": 103210 }, { "epoch": 0.853869379989246, "grad_norm": 607.8330078125, "learning_rate": 8.152133613680035e-07, "loss": 90.1494, "step": 103220 }, { "epoch": 0.8539521032386153, "grad_norm": 634.585205078125, "learning_rate": 8.144416788772957e-07, "loss": 72.4637, "step": 103230 }, { "epoch": 0.8540348264879845, "grad_norm": 672.9807739257812, "learning_rate": 8.136703294122544e-07, "loss": 74.1094, "step": 103240 }, { "epoch": 0.8541175497373537, "grad_norm": 954.1567993164062, "learning_rate": 8.128993130342538e-07, "loss": 98.1181, "step": 103250 }, { "epoch": 0.854200272986723, "grad_norm": 597.5408935546875, "learning_rate": 8.121286298046372e-07, "loss": 86.6912, "step": 103260 }, { "epoch": 0.8542829962360922, "grad_norm": 522.8579711914062, "learning_rate": 8.113582797847252e-07, "loss": 96.0704, "step": 103270 }, { "epoch": 0.8543657194854614, "grad_norm": 649.9797973632812, "learning_rate": 8.105882630358125e-07, "loss": 78.3974, "step": 103280 }, { "epoch": 0.8544484427348307, "grad_norm": 538.1500244140625, "learning_rate": 8.098185796191632e-07, "loss": 62.7917, "step": 103290 }, { "epoch": 0.8545311659841999, "grad_norm": 425.60430908203125, "learning_rate": 8.090492295960206e-07, "loss": 67.8472, "step": 103300 }, { "epoch": 0.8546138892335691, "grad_norm": 498.4320068359375, "learning_rate": 8.082802130275968e-07, "loss": 80.5484, "step": 103310 }, { "epoch": 0.8546966124829384, "grad_norm": 679.7952880859375, "learning_rate": 8.075115299750797e-07, "loss": 92.3465, "step": 103320 }, { "epoch": 0.8547793357323076, "grad_norm": 643.3114013671875, "learning_rate": 8.067431804996284e-07, "loss": 77.1838, "step": 103330 }, { "epoch": 0.8548620589816768, "grad_norm": 746.61865234375, "learning_rate": 8.059751646623792e-07, "loss": 91.1856, "step": 103340 }, { "epoch": 0.8549447822310461, "grad_norm": 1265.4454345703125, "learning_rate": 8.052074825244371e-07, "loss": 99.7866, "step": 103350 }, { "epoch": 0.8550275054804153, "grad_norm": 791.6976318359375, "learning_rate": 8.044401341468839e-07, "loss": 75.7532, "step": 103360 }, { "epoch": 0.8551102287297845, "grad_norm": 963.1607055664062, "learning_rate": 8.036731195907743e-07, "loss": 87.9717, "step": 103370 }, { "epoch": 0.8551929519791538, "grad_norm": 702.797119140625, "learning_rate": 8.029064389171365e-07, "loss": 77.9525, "step": 103380 }, { "epoch": 0.855275675228523, "grad_norm": 794.2588500976562, "learning_rate": 8.021400921869693e-07, "loss": 62.4597, "step": 103390 }, { "epoch": 0.8553583984778922, "grad_norm": 654.7899780273438, "learning_rate": 8.013740794612512e-07, "loss": 90.647, "step": 103400 }, { "epoch": 0.8554411217272615, "grad_norm": 317.69580078125, "learning_rate": 8.006084008009285e-07, "loss": 68.5561, "step": 103410 }, { "epoch": 0.8555238449766307, "grad_norm": 737.1478271484375, "learning_rate": 7.998430562669234e-07, "loss": 94.218, "step": 103420 }, { "epoch": 0.8556065682259999, "grad_norm": 625.2897338867188, "learning_rate": 7.990780459201291e-07, "loss": 68.3049, "step": 103430 }, { "epoch": 0.8556892914753692, "grad_norm": 955.1998291015625, "learning_rate": 7.983133698214158e-07, "loss": 98.8367, "step": 103440 }, { "epoch": 0.8557720147247384, "grad_norm": 1487.3785400390625, "learning_rate": 7.975490280316239e-07, "loss": 124.9303, "step": 103450 }, { "epoch": 0.8558547379741076, "grad_norm": 822.9546508789062, "learning_rate": 7.96785020611569e-07, "loss": 81.8008, "step": 103460 }, { "epoch": 0.8559374612234768, "grad_norm": 357.85772705078125, "learning_rate": 7.960213476220402e-07, "loss": 73.6397, "step": 103470 }, { "epoch": 0.8560201844728461, "grad_norm": 1243.547119140625, "learning_rate": 7.952580091237993e-07, "loss": 73.8588, "step": 103480 }, { "epoch": 0.8561029077222153, "grad_norm": 1284.897705078125, "learning_rate": 7.944950051775802e-07, "loss": 114.3679, "step": 103490 }, { "epoch": 0.8561856309715845, "grad_norm": 482.4344482421875, "learning_rate": 7.937323358440935e-07, "loss": 61.2101, "step": 103500 }, { "epoch": 0.8562683542209538, "grad_norm": 644.4739990234375, "learning_rate": 7.929700011840225e-07, "loss": 59.5683, "step": 103510 }, { "epoch": 0.856351077470323, "grad_norm": 894.75341796875, "learning_rate": 7.922080012580191e-07, "loss": 64.3507, "step": 103520 }, { "epoch": 0.8564338007196922, "grad_norm": 336.1860046386719, "learning_rate": 7.914463361267144e-07, "loss": 77.5185, "step": 103530 }, { "epoch": 0.8565165239690615, "grad_norm": 769.1115112304688, "learning_rate": 7.906850058507098e-07, "loss": 70.1408, "step": 103540 }, { "epoch": 0.8565992472184307, "grad_norm": 759.4151000976562, "learning_rate": 7.899240104905814e-07, "loss": 62.1766, "step": 103550 }, { "epoch": 0.8566819704677999, "grad_norm": 1259.5947265625, "learning_rate": 7.891633501068774e-07, "loss": 83.4048, "step": 103560 }, { "epoch": 0.8567646937171692, "grad_norm": 948.8060302734375, "learning_rate": 7.88403024760121e-07, "loss": 82.6916, "step": 103570 }, { "epoch": 0.8568474169665384, "grad_norm": 881.0368041992188, "learning_rate": 7.876430345108072e-07, "loss": 92.3865, "step": 103580 }, { "epoch": 0.8569301402159076, "grad_norm": 609.7398681640625, "learning_rate": 7.868833794194048e-07, "loss": 55.702, "step": 103590 }, { "epoch": 0.8570128634652769, "grad_norm": 967.415771484375, "learning_rate": 7.861240595463565e-07, "loss": 66.2445, "step": 103600 }, { "epoch": 0.8570955867146461, "grad_norm": 916.1266479492188, "learning_rate": 7.853650749520775e-07, "loss": 95.8712, "step": 103610 }, { "epoch": 0.8571783099640153, "grad_norm": 1089.77783203125, "learning_rate": 7.846064256969571e-07, "loss": 96.1446, "step": 103620 }, { "epoch": 0.8572610332133846, "grad_norm": 1048.322265625, "learning_rate": 7.838481118413571e-07, "loss": 81.8681, "step": 103630 }, { "epoch": 0.8573437564627538, "grad_norm": 375.7430419921875, "learning_rate": 7.830901334456137e-07, "loss": 88.0491, "step": 103640 }, { "epoch": 0.857426479712123, "grad_norm": 1264.2864990234375, "learning_rate": 7.823324905700352e-07, "loss": 87.9994, "step": 103650 }, { "epoch": 0.8575092029614924, "grad_norm": 961.72216796875, "learning_rate": 7.815751832749035e-07, "loss": 85.5537, "step": 103660 }, { "epoch": 0.8575919262108616, "grad_norm": 1320.927978515625, "learning_rate": 7.808182116204755e-07, "loss": 114.779, "step": 103670 }, { "epoch": 0.8576746494602308, "grad_norm": 738.6021728515625, "learning_rate": 7.800615756669783e-07, "loss": 100.2147, "step": 103680 }, { "epoch": 0.8577573727096001, "grad_norm": 623.4457397460938, "learning_rate": 7.793052754746144e-07, "loss": 89.7, "step": 103690 }, { "epoch": 0.8578400959589693, "grad_norm": 870.4207763671875, "learning_rate": 7.785493111035597e-07, "loss": 102.5554, "step": 103700 }, { "epoch": 0.8579228192083385, "grad_norm": 1017.7730102539062, "learning_rate": 7.777936826139626e-07, "loss": 99.2322, "step": 103710 }, { "epoch": 0.8580055424577078, "grad_norm": 630.4139404296875, "learning_rate": 7.770383900659451e-07, "loss": 73.928, "step": 103720 }, { "epoch": 0.858088265707077, "grad_norm": 926.4790649414062, "learning_rate": 7.762834335196013e-07, "loss": 78.6408, "step": 103730 }, { "epoch": 0.8581709889564462, "grad_norm": 1010.821044921875, "learning_rate": 7.755288130350008e-07, "loss": 70.0562, "step": 103740 }, { "epoch": 0.8582537122058155, "grad_norm": 1223.12060546875, "learning_rate": 7.747745286721852e-07, "loss": 103.973, "step": 103750 }, { "epoch": 0.8583364354551847, "grad_norm": 652.41796875, "learning_rate": 7.740205804911693e-07, "loss": 83.6676, "step": 103760 }, { "epoch": 0.8584191587045539, "grad_norm": 588.4229736328125, "learning_rate": 7.732669685519406e-07, "loss": 99.9844, "step": 103770 }, { "epoch": 0.8585018819539232, "grad_norm": 514.46728515625, "learning_rate": 7.725136929144617e-07, "loss": 77.1376, "step": 103780 }, { "epoch": 0.8585846052032924, "grad_norm": 480.8714294433594, "learning_rate": 7.717607536386662e-07, "loss": 62.5537, "step": 103790 }, { "epoch": 0.8586673284526616, "grad_norm": 789.140380859375, "learning_rate": 7.71008150784463e-07, "loss": 91.3066, "step": 103800 }, { "epoch": 0.8587500517020309, "grad_norm": 514.1470947265625, "learning_rate": 7.702558844117325e-07, "loss": 103.8474, "step": 103810 }, { "epoch": 0.8588327749514001, "grad_norm": 347.6160888671875, "learning_rate": 7.695039545803295e-07, "loss": 95.0633, "step": 103820 }, { "epoch": 0.8589154982007693, "grad_norm": 526.0089721679688, "learning_rate": 7.687523613500814e-07, "loss": 83.8635, "step": 103830 }, { "epoch": 0.8589982214501386, "grad_norm": 982.0831909179688, "learning_rate": 7.680011047807894e-07, "loss": 118.7206, "step": 103840 }, { "epoch": 0.8590809446995078, "grad_norm": 723.9911499023438, "learning_rate": 7.672501849322266e-07, "loss": 83.4975, "step": 103850 }, { "epoch": 0.859163667948877, "grad_norm": 953.7279663085938, "learning_rate": 7.664996018641413e-07, "loss": 72.9386, "step": 103860 }, { "epoch": 0.8592463911982463, "grad_norm": 819.978271484375, "learning_rate": 7.657493556362539e-07, "loss": 84.9272, "step": 103870 }, { "epoch": 0.8593291144476155, "grad_norm": 1022.4807739257812, "learning_rate": 7.649994463082572e-07, "loss": 93.3535, "step": 103880 }, { "epoch": 0.8594118376969847, "grad_norm": 627.8077392578125, "learning_rate": 7.642498739398185e-07, "loss": 118.7882, "step": 103890 }, { "epoch": 0.859494560946354, "grad_norm": 927.2117309570312, "learning_rate": 7.63500638590578e-07, "loss": 72.8843, "step": 103900 }, { "epoch": 0.8595772841957232, "grad_norm": 905.3977661132812, "learning_rate": 7.62751740320149e-07, "loss": 96.9559, "step": 103910 }, { "epoch": 0.8596600074450924, "grad_norm": 507.61529541015625, "learning_rate": 7.620031791881172e-07, "loss": 89.9055, "step": 103920 }, { "epoch": 0.8597427306944617, "grad_norm": 831.4810180664062, "learning_rate": 7.612549552540426e-07, "loss": 68.1213, "step": 103930 }, { "epoch": 0.8598254539438309, "grad_norm": 1053.16943359375, "learning_rate": 7.605070685774596e-07, "loss": 113.8502, "step": 103940 }, { "epoch": 0.8599081771932001, "grad_norm": 908.6838989257812, "learning_rate": 7.597595192178702e-07, "loss": 100.7114, "step": 103950 }, { "epoch": 0.8599909004425694, "grad_norm": 800.2693481445312, "learning_rate": 7.590123072347566e-07, "loss": 98.1212, "step": 103960 }, { "epoch": 0.8600736236919386, "grad_norm": 559.2039794921875, "learning_rate": 7.582654326875705e-07, "loss": 84.4858, "step": 103970 }, { "epoch": 0.8601563469413078, "grad_norm": 799.5015258789062, "learning_rate": 7.575188956357371e-07, "loss": 88.0666, "step": 103980 }, { "epoch": 0.8602390701906771, "grad_norm": 413.585693359375, "learning_rate": 7.567726961386546e-07, "loss": 68.5193, "step": 103990 }, { "epoch": 0.8603217934400463, "grad_norm": 689.4683837890625, "learning_rate": 7.560268342556948e-07, "loss": 68.5218, "step": 104000 }, { "epoch": 0.8604045166894155, "grad_norm": 1106.870361328125, "learning_rate": 7.552813100462025e-07, "loss": 97.9523, "step": 104010 }, { "epoch": 0.8604872399387848, "grad_norm": 949.8282470703125, "learning_rate": 7.54536123569497e-07, "loss": 75.4276, "step": 104020 }, { "epoch": 0.860569963188154, "grad_norm": 1082.73828125, "learning_rate": 7.537912748848669e-07, "loss": 102.4075, "step": 104030 }, { "epoch": 0.8606526864375232, "grad_norm": 1304.8778076171875, "learning_rate": 7.530467640515782e-07, "loss": 101.8359, "step": 104040 }, { "epoch": 0.8607354096868925, "grad_norm": 732.41015625, "learning_rate": 7.523025911288656e-07, "loss": 90.7578, "step": 104050 }, { "epoch": 0.8608181329362617, "grad_norm": 1173.3736572265625, "learning_rate": 7.51558756175943e-07, "loss": 85.3348, "step": 104060 }, { "epoch": 0.860900856185631, "grad_norm": 673.0076904296875, "learning_rate": 7.508152592519924e-07, "loss": 98.3655, "step": 104070 }, { "epoch": 0.8609835794350003, "grad_norm": 3371.377197265625, "learning_rate": 7.500721004161709e-07, "loss": 105.9698, "step": 104080 }, { "epoch": 0.8610663026843695, "grad_norm": 945.2011108398438, "learning_rate": 7.493292797276075e-07, "loss": 106.223, "step": 104090 }, { "epoch": 0.8611490259337387, "grad_norm": 777.4385375976562, "learning_rate": 7.485867972454053e-07, "loss": 71.3168, "step": 104100 }, { "epoch": 0.861231749183108, "grad_norm": 1511.415283203125, "learning_rate": 7.478446530286415e-07, "loss": 109.106, "step": 104110 }, { "epoch": 0.8613144724324772, "grad_norm": 477.33001708984375, "learning_rate": 7.471028471363628e-07, "loss": 119.4962, "step": 104120 }, { "epoch": 0.8613971956818464, "grad_norm": 616.1210327148438, "learning_rate": 7.463613796275921e-07, "loss": 104.9887, "step": 104130 }, { "epoch": 0.8614799189312157, "grad_norm": 676.0572509765625, "learning_rate": 7.456202505613252e-07, "loss": 79.0803, "step": 104140 }, { "epoch": 0.8615626421805849, "grad_norm": 776.3839111328125, "learning_rate": 7.448794599965286e-07, "loss": 73.2689, "step": 104150 }, { "epoch": 0.8616453654299541, "grad_norm": 814.0975341796875, "learning_rate": 7.441390079921463e-07, "loss": 71.7772, "step": 104160 }, { "epoch": 0.8617280886793234, "grad_norm": 962.8041381835938, "learning_rate": 7.433988946070913e-07, "loss": 76.7952, "step": 104170 }, { "epoch": 0.8618108119286926, "grad_norm": 901.0029296875, "learning_rate": 7.426591199002514e-07, "loss": 142.9842, "step": 104180 }, { "epoch": 0.8618935351780618, "grad_norm": 527.9906005859375, "learning_rate": 7.419196839304865e-07, "loss": 70.0582, "step": 104190 }, { "epoch": 0.861976258427431, "grad_norm": 820.9356079101562, "learning_rate": 7.411805867566319e-07, "loss": 69.29, "step": 104200 }, { "epoch": 0.8620589816768003, "grad_norm": 788.0999145507812, "learning_rate": 7.404418284374909e-07, "loss": 103.0304, "step": 104210 }, { "epoch": 0.8621417049261695, "grad_norm": 770.0556640625, "learning_rate": 7.397034090318455e-07, "loss": 82.4049, "step": 104220 }, { "epoch": 0.8622244281755387, "grad_norm": 595.1284790039062, "learning_rate": 7.389653285984471e-07, "loss": 77.7128, "step": 104230 }, { "epoch": 0.862307151424908, "grad_norm": 597.3800048828125, "learning_rate": 7.382275871960215e-07, "loss": 73.3765, "step": 104240 }, { "epoch": 0.8623898746742772, "grad_norm": 1349.270263671875, "learning_rate": 7.374901848832683e-07, "loss": 98.555, "step": 104250 }, { "epoch": 0.8624725979236464, "grad_norm": 962.1214599609375, "learning_rate": 7.367531217188595e-07, "loss": 66.6915, "step": 104260 }, { "epoch": 0.8625553211730157, "grad_norm": 757.8329467773438, "learning_rate": 7.360163977614388e-07, "loss": 112.147, "step": 104270 }, { "epoch": 0.8626380444223849, "grad_norm": 1464.066650390625, "learning_rate": 7.352800130696253e-07, "loss": 84.9319, "step": 104280 }, { "epoch": 0.8627207676717541, "grad_norm": 862.40478515625, "learning_rate": 7.345439677020077e-07, "loss": 94.3516, "step": 104290 }, { "epoch": 0.8628034909211234, "grad_norm": 282.1973876953125, "learning_rate": 7.33808261717151e-07, "loss": 82.4753, "step": 104300 }, { "epoch": 0.8628862141704926, "grad_norm": 608.2290649414062, "learning_rate": 7.330728951735916e-07, "loss": 79.4648, "step": 104310 }, { "epoch": 0.8629689374198618, "grad_norm": 794.7872924804688, "learning_rate": 7.323378681298394e-07, "loss": 76.8241, "step": 104320 }, { "epoch": 0.8630516606692311, "grad_norm": 795.65380859375, "learning_rate": 7.316031806443774e-07, "loss": 68.2953, "step": 104330 }, { "epoch": 0.8631343839186003, "grad_norm": 975.4625244140625, "learning_rate": 7.308688327756591e-07, "loss": 89.0946, "step": 104340 }, { "epoch": 0.8632171071679695, "grad_norm": 435.83013916015625, "learning_rate": 7.301348245821172e-07, "loss": 83.3241, "step": 104350 }, { "epoch": 0.8632998304173388, "grad_norm": 991.8388061523438, "learning_rate": 7.294011561221503e-07, "loss": 82.6674, "step": 104360 }, { "epoch": 0.863382553666708, "grad_norm": 756.9240112304688, "learning_rate": 7.286678274541358e-07, "loss": 61.5385, "step": 104370 }, { "epoch": 0.8634652769160772, "grad_norm": 836.5737915039062, "learning_rate": 7.279348386364182e-07, "loss": 88.6052, "step": 104380 }, { "epoch": 0.8635480001654465, "grad_norm": 563.5185546875, "learning_rate": 7.272021897273196e-07, "loss": 77.5748, "step": 104390 }, { "epoch": 0.8636307234148157, "grad_norm": 952.6004638671875, "learning_rate": 7.264698807851328e-07, "loss": 80.778, "step": 104400 }, { "epoch": 0.8637134466641849, "grad_norm": 484.9126281738281, "learning_rate": 7.257379118681251e-07, "loss": 74.0515, "step": 104410 }, { "epoch": 0.8637961699135542, "grad_norm": 680.9312133789062, "learning_rate": 7.250062830345356e-07, "loss": 96.0891, "step": 104420 }, { "epoch": 0.8638788931629234, "grad_norm": 759.776123046875, "learning_rate": 7.242749943425765e-07, "loss": 110.3812, "step": 104430 }, { "epoch": 0.8639616164122926, "grad_norm": 1156.90380859375, "learning_rate": 7.235440458504317e-07, "loss": 98.4978, "step": 104440 }, { "epoch": 0.8640443396616619, "grad_norm": 910.2420654296875, "learning_rate": 7.228134376162632e-07, "loss": 121.0294, "step": 104450 }, { "epoch": 0.8641270629110311, "grad_norm": 1492.252197265625, "learning_rate": 7.22083169698199e-07, "loss": 107.3472, "step": 104460 }, { "epoch": 0.8642097861604003, "grad_norm": 1519.6361083984375, "learning_rate": 7.21353242154344e-07, "loss": 108.2701, "step": 104470 }, { "epoch": 0.8642925094097696, "grad_norm": 1137.7952880859375, "learning_rate": 7.206236550427747e-07, "loss": 70.6361, "step": 104480 }, { "epoch": 0.8643752326591388, "grad_norm": 538.6994018554688, "learning_rate": 7.198944084215421e-07, "loss": 59.6236, "step": 104490 }, { "epoch": 0.864457955908508, "grad_norm": 511.9731140136719, "learning_rate": 7.191655023486682e-07, "loss": 84.1222, "step": 104500 }, { "epoch": 0.8645406791578774, "grad_norm": 897.7769775390625, "learning_rate": 7.184369368821486e-07, "loss": 84.8233, "step": 104510 }, { "epoch": 0.8646234024072466, "grad_norm": 597.1415405273438, "learning_rate": 7.177087120799525e-07, "loss": 68.9195, "step": 104520 }, { "epoch": 0.8647061256566158, "grad_norm": 973.8169555664062, "learning_rate": 7.169808280000213e-07, "loss": 130.055, "step": 104530 }, { "epoch": 0.8647888489059851, "grad_norm": 729.9190673828125, "learning_rate": 7.16253284700269e-07, "loss": 54.8949, "step": 104540 }, { "epoch": 0.8648715721553543, "grad_norm": 1125.672119140625, "learning_rate": 7.155260822385828e-07, "loss": 103.3253, "step": 104550 }, { "epoch": 0.8649542954047235, "grad_norm": 1073.4981689453125, "learning_rate": 7.147992206728238e-07, "loss": 69.4759, "step": 104560 }, { "epoch": 0.8650370186540928, "grad_norm": 947.5101318359375, "learning_rate": 7.140727000608239e-07, "loss": 90.7275, "step": 104570 }, { "epoch": 0.865119741903462, "grad_norm": 874.85302734375, "learning_rate": 7.133465204603895e-07, "loss": 91.6719, "step": 104580 }, { "epoch": 0.8652024651528312, "grad_norm": 994.0018310546875, "learning_rate": 7.126206819292997e-07, "loss": 79.112, "step": 104590 }, { "epoch": 0.8652851884022005, "grad_norm": 875.634521484375, "learning_rate": 7.118951845253053e-07, "loss": 75.0624, "step": 104600 }, { "epoch": 0.8653679116515697, "grad_norm": 905.0895385742188, "learning_rate": 7.111700283061318e-07, "loss": 60.7013, "step": 104610 }, { "epoch": 0.8654506349009389, "grad_norm": 526.9947509765625, "learning_rate": 7.104452133294759e-07, "loss": 88.9498, "step": 104620 }, { "epoch": 0.8655333581503082, "grad_norm": 724.978515625, "learning_rate": 7.097207396530081e-07, "loss": 81.7083, "step": 104630 }, { "epoch": 0.8656160813996774, "grad_norm": 626.593017578125, "learning_rate": 7.089966073343712e-07, "loss": 86.5442, "step": 104640 }, { "epoch": 0.8656988046490466, "grad_norm": 718.5762329101562, "learning_rate": 7.082728164311814e-07, "loss": 77.1169, "step": 104650 }, { "epoch": 0.8657815278984159, "grad_norm": 908.7042236328125, "learning_rate": 7.07549367001027e-07, "loss": 97.2458, "step": 104660 }, { "epoch": 0.8658642511477851, "grad_norm": 863.9970092773438, "learning_rate": 7.068262591014696e-07, "loss": 97.336, "step": 104670 }, { "epoch": 0.8659469743971543, "grad_norm": 558.906494140625, "learning_rate": 7.06103492790044e-07, "loss": 70.4578, "step": 104680 }, { "epoch": 0.8660296976465236, "grad_norm": 552.6804809570312, "learning_rate": 7.053810681242573e-07, "loss": 70.8659, "step": 104690 }, { "epoch": 0.8661124208958928, "grad_norm": 508.5573425292969, "learning_rate": 7.046589851615893e-07, "loss": 64.113, "step": 104700 }, { "epoch": 0.866195144145262, "grad_norm": 806.9329833984375, "learning_rate": 7.039372439594927e-07, "loss": 76.6169, "step": 104710 }, { "epoch": 0.8662778673946313, "grad_norm": 811.2073974609375, "learning_rate": 7.032158445753934e-07, "loss": 89.2829, "step": 104720 }, { "epoch": 0.8663605906440005, "grad_norm": 810.32421875, "learning_rate": 7.024947870666899e-07, "loss": 82.497, "step": 104730 }, { "epoch": 0.8664433138933697, "grad_norm": 758.9779052734375, "learning_rate": 7.017740714907534e-07, "loss": 82.4294, "step": 104740 }, { "epoch": 0.866526037142739, "grad_norm": 794.656982421875, "learning_rate": 7.010536979049277e-07, "loss": 96.2241, "step": 104750 }, { "epoch": 0.8666087603921082, "grad_norm": 494.71063232421875, "learning_rate": 7.003336663665294e-07, "loss": 83.7569, "step": 104760 }, { "epoch": 0.8666914836414774, "grad_norm": 880.26318359375, "learning_rate": 6.996139769328492e-07, "loss": 84.9818, "step": 104770 }, { "epoch": 0.8667742068908467, "grad_norm": 766.883544921875, "learning_rate": 6.988946296611482e-07, "loss": 80.0834, "step": 104780 }, { "epoch": 0.8668569301402159, "grad_norm": 1070.8145751953125, "learning_rate": 6.981756246086623e-07, "loss": 81.7685, "step": 104790 }, { "epoch": 0.8669396533895851, "grad_norm": 951.6563110351562, "learning_rate": 6.974569618325993e-07, "loss": 87.9841, "step": 104800 }, { "epoch": 0.8670223766389544, "grad_norm": 1062.390869140625, "learning_rate": 6.967386413901395e-07, "loss": 88.5198, "step": 104810 }, { "epoch": 0.8671050998883236, "grad_norm": 774.4631958007812, "learning_rate": 6.96020663338437e-07, "loss": 91.2737, "step": 104820 }, { "epoch": 0.8671878231376928, "grad_norm": 432.4591979980469, "learning_rate": 6.953030277346179e-07, "loss": 77.8616, "step": 104830 }, { "epoch": 0.8672705463870621, "grad_norm": 691.8195190429688, "learning_rate": 6.945857346357804e-07, "loss": 122.5289, "step": 104840 }, { "epoch": 0.8673532696364313, "grad_norm": 952.2196044921875, "learning_rate": 6.938687840989972e-07, "loss": 63.169, "step": 104850 }, { "epoch": 0.8674359928858005, "grad_norm": 0.0, "learning_rate": 6.931521761813126e-07, "loss": 88.435, "step": 104860 }, { "epoch": 0.8675187161351698, "grad_norm": 733.5023193359375, "learning_rate": 6.924359109397433e-07, "loss": 74.1316, "step": 104870 }, { "epoch": 0.867601439384539, "grad_norm": 641.714111328125, "learning_rate": 6.917199884312809e-07, "loss": 88.4582, "step": 104880 }, { "epoch": 0.8676841626339082, "grad_norm": 790.0752563476562, "learning_rate": 6.910044087128848e-07, "loss": 84.1604, "step": 104890 }, { "epoch": 0.8677668858832776, "grad_norm": 663.8312377929688, "learning_rate": 6.902891718414916e-07, "loss": 88.1632, "step": 104900 }, { "epoch": 0.8678496091326467, "grad_norm": 591.2030639648438, "learning_rate": 6.895742778740117e-07, "loss": 81.408, "step": 104910 }, { "epoch": 0.867932332382016, "grad_norm": 624.845458984375, "learning_rate": 6.888597268673236e-07, "loss": 62.2073, "step": 104920 }, { "epoch": 0.8680150556313851, "grad_norm": 794.0993041992188, "learning_rate": 6.881455188782821e-07, "loss": 77.9982, "step": 104930 }, { "epoch": 0.8680977788807545, "grad_norm": 666.7411499023438, "learning_rate": 6.874316539637127e-07, "loss": 97.2312, "step": 104940 }, { "epoch": 0.8681805021301237, "grad_norm": 1049.414306640625, "learning_rate": 6.867181321804145e-07, "loss": 65.9971, "step": 104950 }, { "epoch": 0.8682632253794929, "grad_norm": 1004.761474609375, "learning_rate": 6.860049535851593e-07, "loss": 101.2232, "step": 104960 }, { "epoch": 0.8683459486288622, "grad_norm": 1056.7117919921875, "learning_rate": 6.852921182346927e-07, "loss": 89.3601, "step": 104970 }, { "epoch": 0.8684286718782314, "grad_norm": 801.506103515625, "learning_rate": 6.84579626185729e-07, "loss": 115.8205, "step": 104980 }, { "epoch": 0.8685113951276006, "grad_norm": 475.1383056640625, "learning_rate": 6.838674774949594e-07, "loss": 71.0523, "step": 104990 }, { "epoch": 0.8685941183769699, "grad_norm": 2474.7802734375, "learning_rate": 6.831556722190453e-07, "loss": 73.0823, "step": 105000 }, { "epoch": 0.8686768416263391, "grad_norm": 897.792724609375, "learning_rate": 6.82444210414624e-07, "loss": 100.9971, "step": 105010 }, { "epoch": 0.8687595648757083, "grad_norm": 886.6505737304688, "learning_rate": 6.817330921383014e-07, "loss": 82.7632, "step": 105020 }, { "epoch": 0.8688422881250776, "grad_norm": 768.843505859375, "learning_rate": 6.81022317446659e-07, "loss": 80.1767, "step": 105030 }, { "epoch": 0.8689250113744468, "grad_norm": 497.87469482421875, "learning_rate": 6.803118863962488e-07, "loss": 81.78, "step": 105040 }, { "epoch": 0.869007734623816, "grad_norm": 491.0223083496094, "learning_rate": 6.796017990435977e-07, "loss": 47.6318, "step": 105050 }, { "epoch": 0.8690904578731853, "grad_norm": 710.4462890625, "learning_rate": 6.788920554452044e-07, "loss": 94.524, "step": 105060 }, { "epoch": 0.8691731811225545, "grad_norm": 816.4830932617188, "learning_rate": 6.781826556575377e-07, "loss": 103.3359, "step": 105070 }, { "epoch": 0.8692559043719237, "grad_norm": 654.7847290039062, "learning_rate": 6.77473599737043e-07, "loss": 90.9469, "step": 105080 }, { "epoch": 0.869338627621293, "grad_norm": 850.1752319335938, "learning_rate": 6.767648877401361e-07, "loss": 75.9913, "step": 105090 }, { "epoch": 0.8694213508706622, "grad_norm": 577.8858032226562, "learning_rate": 6.76056519723205e-07, "loss": 108.0124, "step": 105100 }, { "epoch": 0.8695040741200314, "grad_norm": 1150.85205078125, "learning_rate": 6.753484957426132e-07, "loss": 108.4332, "step": 105110 }, { "epoch": 0.8695867973694007, "grad_norm": 380.7832336425781, "learning_rate": 6.746408158546947e-07, "loss": 58.2194, "step": 105120 }, { "epoch": 0.8696695206187699, "grad_norm": 683.9119873046875, "learning_rate": 6.739334801157554e-07, "loss": 94.9094, "step": 105130 }, { "epoch": 0.8697522438681391, "grad_norm": 695.6708984375, "learning_rate": 6.732264885820761e-07, "loss": 90.9071, "step": 105140 }, { "epoch": 0.8698349671175084, "grad_norm": 586.7554321289062, "learning_rate": 6.725198413099071e-07, "loss": 68.9317, "step": 105150 }, { "epoch": 0.8699176903668776, "grad_norm": 791.099609375, "learning_rate": 6.718135383554736e-07, "loss": 86.9691, "step": 105160 }, { "epoch": 0.8700004136162468, "grad_norm": 785.0557250976562, "learning_rate": 6.711075797749733e-07, "loss": 89.6558, "step": 105170 }, { "epoch": 0.8700831368656161, "grad_norm": 899.1886596679688, "learning_rate": 6.704019656245764e-07, "loss": 93.6414, "step": 105180 }, { "epoch": 0.8701658601149853, "grad_norm": 802.5755615234375, "learning_rate": 6.696966959604234e-07, "loss": 108.0222, "step": 105190 }, { "epoch": 0.8702485833643545, "grad_norm": 1072.0621337890625, "learning_rate": 6.689917708386317e-07, "loss": 87.7261, "step": 105200 }, { "epoch": 0.8703313066137238, "grad_norm": 519.7999267578125, "learning_rate": 6.682871903152888e-07, "loss": 99.5766, "step": 105210 }, { "epoch": 0.870414029863093, "grad_norm": 964.565673828125, "learning_rate": 6.675829544464535e-07, "loss": 81.1996, "step": 105220 }, { "epoch": 0.8704967531124622, "grad_norm": 692.7098388671875, "learning_rate": 6.668790632881611e-07, "loss": 78.7892, "step": 105230 }, { "epoch": 0.8705794763618315, "grad_norm": 1007.2482299804688, "learning_rate": 6.66175516896414e-07, "loss": 140.3374, "step": 105240 }, { "epoch": 0.8706621996112007, "grad_norm": 1265.0731201171875, "learning_rate": 6.654723153271913e-07, "loss": 86.9678, "step": 105250 }, { "epoch": 0.8707449228605699, "grad_norm": 632.7286376953125, "learning_rate": 6.64769458636444e-07, "loss": 93.2092, "step": 105260 }, { "epoch": 0.8708276461099392, "grad_norm": 836.0169067382812, "learning_rate": 6.640669468800947e-07, "loss": 92.5722, "step": 105270 }, { "epoch": 0.8709103693593084, "grad_norm": 658.7850952148438, "learning_rate": 6.633647801140391e-07, "loss": 100.487, "step": 105280 }, { "epoch": 0.8709930926086776, "grad_norm": 587.75146484375, "learning_rate": 6.626629583941447e-07, "loss": 68.9679, "step": 105290 }, { "epoch": 0.8710758158580469, "grad_norm": 817.0744018554688, "learning_rate": 6.619614817762537e-07, "loss": 73.4202, "step": 105300 }, { "epoch": 0.8711585391074161, "grad_norm": 1041.8173828125, "learning_rate": 6.612603503161802e-07, "loss": 64.3145, "step": 105310 }, { "epoch": 0.8712412623567853, "grad_norm": 1404.5458984375, "learning_rate": 6.605595640697071e-07, "loss": 99.786, "step": 105320 }, { "epoch": 0.8713239856061546, "grad_norm": 771.7532958984375, "learning_rate": 6.598591230925943e-07, "loss": 82.4178, "step": 105330 }, { "epoch": 0.8714067088555238, "grad_norm": 751.2391357421875, "learning_rate": 6.591590274405723e-07, "loss": 122.4575, "step": 105340 }, { "epoch": 0.871489432104893, "grad_norm": 911.8988037109375, "learning_rate": 6.584592771693449e-07, "loss": 81.5654, "step": 105350 }, { "epoch": 0.8715721553542624, "grad_norm": 1637.146484375, "learning_rate": 6.57759872334588e-07, "loss": 97.9183, "step": 105360 }, { "epoch": 0.8716548786036316, "grad_norm": 329.8784484863281, "learning_rate": 6.570608129919492e-07, "loss": 62.8334, "step": 105370 }, { "epoch": 0.8717376018530008, "grad_norm": 1938.16455078125, "learning_rate": 6.563620991970509e-07, "loss": 76.7647, "step": 105380 }, { "epoch": 0.8718203251023701, "grad_norm": 717.9798583984375, "learning_rate": 6.556637310054842e-07, "loss": 82.9026, "step": 105390 }, { "epoch": 0.8719030483517393, "grad_norm": 1446.15966796875, "learning_rate": 6.54965708472819e-07, "loss": 78.1929, "step": 105400 }, { "epoch": 0.8719857716011085, "grad_norm": 1189.690673828125, "learning_rate": 6.542680316545902e-07, "loss": 72.0123, "step": 105410 }, { "epoch": 0.8720684948504778, "grad_norm": 613.6715698242188, "learning_rate": 6.535707006063097e-07, "loss": 61.0173, "step": 105420 }, { "epoch": 0.872151218099847, "grad_norm": 1527.14990234375, "learning_rate": 6.528737153834613e-07, "loss": 130.8081, "step": 105430 }, { "epoch": 0.8722339413492162, "grad_norm": 1041.12109375, "learning_rate": 6.521770760415008e-07, "loss": 93.4483, "step": 105440 }, { "epoch": 0.8723166645985855, "grad_norm": 852.52197265625, "learning_rate": 6.514807826358566e-07, "loss": 90.9408, "step": 105450 }, { "epoch": 0.8723993878479547, "grad_norm": 647.1378173828125, "learning_rate": 6.507848352219299e-07, "loss": 67.9603, "step": 105460 }, { "epoch": 0.8724821110973239, "grad_norm": 1192.1534423828125, "learning_rate": 6.500892338550929e-07, "loss": 111.3489, "step": 105470 }, { "epoch": 0.8725648343466932, "grad_norm": 814.5704345703125, "learning_rate": 6.493939785906928e-07, "loss": 87.0574, "step": 105480 }, { "epoch": 0.8726475575960624, "grad_norm": 741.8197021484375, "learning_rate": 6.486990694840467e-07, "loss": 68.0563, "step": 105490 }, { "epoch": 0.8727302808454316, "grad_norm": 948.8922729492188, "learning_rate": 6.480045065904461e-07, "loss": 89.7507, "step": 105500 }, { "epoch": 0.8728130040948009, "grad_norm": 1155.1693115234375, "learning_rate": 6.47310289965154e-07, "loss": 81.7769, "step": 105510 }, { "epoch": 0.8728957273441701, "grad_norm": 919.3316040039062, "learning_rate": 6.466164196634056e-07, "loss": 97.3234, "step": 105520 }, { "epoch": 0.8729784505935393, "grad_norm": 987.2399291992188, "learning_rate": 6.459228957404101e-07, "loss": 97.3726, "step": 105530 }, { "epoch": 0.8730611738429086, "grad_norm": 838.5607299804688, "learning_rate": 6.452297182513468e-07, "loss": 69.4488, "step": 105540 }, { "epoch": 0.8731438970922778, "grad_norm": 1150.2685546875, "learning_rate": 6.445368872513691e-07, "loss": 72.3736, "step": 105550 }, { "epoch": 0.873226620341647, "grad_norm": 687.7527465820312, "learning_rate": 6.438444027956026e-07, "loss": 68.8998, "step": 105560 }, { "epoch": 0.8733093435910163, "grad_norm": 455.3255920410156, "learning_rate": 6.431522649391447e-07, "loss": 94.7914, "step": 105570 }, { "epoch": 0.8733920668403855, "grad_norm": 599.0716552734375, "learning_rate": 6.42460473737066e-07, "loss": 77.5691, "step": 105580 }, { "epoch": 0.8734747900897547, "grad_norm": 982.269775390625, "learning_rate": 6.417690292444084e-07, "loss": 88.5389, "step": 105590 }, { "epoch": 0.873557513339124, "grad_norm": 304.9704895019531, "learning_rate": 6.410779315161885e-07, "loss": 74.6791, "step": 105600 }, { "epoch": 0.8736402365884932, "grad_norm": 840.72021484375, "learning_rate": 6.403871806073924e-07, "loss": 66.5112, "step": 105610 }, { "epoch": 0.8737229598378624, "grad_norm": 1154.091552734375, "learning_rate": 6.396967765729806e-07, "loss": 92.7399, "step": 105620 }, { "epoch": 0.8738056830872317, "grad_norm": 875.4635009765625, "learning_rate": 6.390067194678851e-07, "loss": 72.3126, "step": 105630 }, { "epoch": 0.8738884063366009, "grad_norm": 776.973876953125, "learning_rate": 6.383170093470103e-07, "loss": 86.9083, "step": 105640 }, { "epoch": 0.8739711295859701, "grad_norm": 1360.440673828125, "learning_rate": 6.376276462652342e-07, "loss": 85.9056, "step": 105650 }, { "epoch": 0.8740538528353393, "grad_norm": 1393.611572265625, "learning_rate": 6.36938630277405e-07, "loss": 97.7015, "step": 105660 }, { "epoch": 0.8741365760847086, "grad_norm": 762.5498046875, "learning_rate": 6.36249961438346e-07, "loss": 111.3497, "step": 105670 }, { "epoch": 0.8742192993340778, "grad_norm": 832.800048828125, "learning_rate": 6.355616398028502e-07, "loss": 81.6632, "step": 105680 }, { "epoch": 0.874302022583447, "grad_norm": 661.8202514648438, "learning_rate": 6.348736654256848e-07, "loss": 86.8958, "step": 105690 }, { "epoch": 0.8743847458328163, "grad_norm": 882.3600463867188, "learning_rate": 6.341860383615889e-07, "loss": 83.2556, "step": 105700 }, { "epoch": 0.8744674690821855, "grad_norm": 867.7281494140625, "learning_rate": 6.33498758665273e-07, "loss": 94.6405, "step": 105710 }, { "epoch": 0.8745501923315547, "grad_norm": 554.966796875, "learning_rate": 6.328118263914218e-07, "loss": 82.4325, "step": 105720 }, { "epoch": 0.874632915580924, "grad_norm": 895.0468139648438, "learning_rate": 6.321252415946904e-07, "loss": 65.7037, "step": 105730 }, { "epoch": 0.8747156388302932, "grad_norm": 812.9765014648438, "learning_rate": 6.314390043297092e-07, "loss": 88.4549, "step": 105740 }, { "epoch": 0.8747983620796624, "grad_norm": 640.9434204101562, "learning_rate": 6.307531146510754e-07, "loss": 86.2904, "step": 105750 }, { "epoch": 0.8748810853290317, "grad_norm": 553.9396362304688, "learning_rate": 6.300675726133648e-07, "loss": 75.9555, "step": 105760 }, { "epoch": 0.874963808578401, "grad_norm": 419.89013671875, "learning_rate": 6.293823782711222e-07, "loss": 118.8286, "step": 105770 }, { "epoch": 0.8750465318277701, "grad_norm": 733.7764892578125, "learning_rate": 6.286975316788657e-07, "loss": 60.0976, "step": 105780 }, { "epoch": 0.8751292550771395, "grad_norm": 1776.192138671875, "learning_rate": 6.280130328910849e-07, "loss": 106.2316, "step": 105790 }, { "epoch": 0.8752119783265087, "grad_norm": 659.7675170898438, "learning_rate": 6.27328881962242e-07, "loss": 86.0116, "step": 105800 }, { "epoch": 0.8752947015758779, "grad_norm": 1190.6595458984375, "learning_rate": 6.266450789467727e-07, "loss": 82.2913, "step": 105810 }, { "epoch": 0.8753774248252472, "grad_norm": 1189.8983154296875, "learning_rate": 6.259616238990828e-07, "loss": 50.2323, "step": 105820 }, { "epoch": 0.8754601480746164, "grad_norm": 1355.994873046875, "learning_rate": 6.252785168735537e-07, "loss": 82.7705, "step": 105830 }, { "epoch": 0.8755428713239856, "grad_norm": 752.8018798828125, "learning_rate": 6.245957579245349e-07, "loss": 67.2084, "step": 105840 }, { "epoch": 0.8756255945733549, "grad_norm": 1385.89306640625, "learning_rate": 6.239133471063502e-07, "loss": 72.485, "step": 105850 }, { "epoch": 0.8757083178227241, "grad_norm": 467.3131103515625, "learning_rate": 6.23231284473298e-07, "loss": 92.7437, "step": 105860 }, { "epoch": 0.8757910410720933, "grad_norm": 937.4329833984375, "learning_rate": 6.225495700796452e-07, "loss": 98.6699, "step": 105870 }, { "epoch": 0.8758737643214626, "grad_norm": 530.5836791992188, "learning_rate": 6.218682039796343e-07, "loss": 79.1359, "step": 105880 }, { "epoch": 0.8759564875708318, "grad_norm": 652.8727416992188, "learning_rate": 6.211871862274765e-07, "loss": 91.6557, "step": 105890 }, { "epoch": 0.876039210820201, "grad_norm": 818.7814331054688, "learning_rate": 6.205065168773589e-07, "loss": 101.5155, "step": 105900 }, { "epoch": 0.8761219340695703, "grad_norm": 876.2640380859375, "learning_rate": 6.198261959834384e-07, "loss": 103.2004, "step": 105910 }, { "epoch": 0.8762046573189395, "grad_norm": 516.7633056640625, "learning_rate": 6.191462235998463e-07, "loss": 77.5895, "step": 105920 }, { "epoch": 0.8762873805683087, "grad_norm": 646.3327026367188, "learning_rate": 6.184665997806832e-07, "loss": 56.7936, "step": 105930 }, { "epoch": 0.876370103817678, "grad_norm": 576.1641235351562, "learning_rate": 6.177873245800237e-07, "loss": 70.1931, "step": 105940 }, { "epoch": 0.8764528270670472, "grad_norm": 1594.127685546875, "learning_rate": 6.171083980519138e-07, "loss": 94.1285, "step": 105950 }, { "epoch": 0.8765355503164164, "grad_norm": 1294.6268310546875, "learning_rate": 6.164298202503754e-07, "loss": 101.6075, "step": 105960 }, { "epoch": 0.8766182735657857, "grad_norm": 540.323974609375, "learning_rate": 6.157515912293982e-07, "loss": 87.1123, "step": 105970 }, { "epoch": 0.8767009968151549, "grad_norm": 570.9243774414062, "learning_rate": 6.150737110429461e-07, "loss": 74.8245, "step": 105980 }, { "epoch": 0.8767837200645241, "grad_norm": 906.1620483398438, "learning_rate": 6.143961797449549e-07, "loss": 71.5885, "step": 105990 }, { "epoch": 0.8768664433138934, "grad_norm": 878.2374267578125, "learning_rate": 6.137189973893331e-07, "loss": 73.5436, "step": 106000 }, { "epoch": 0.8769491665632626, "grad_norm": 873.0944213867188, "learning_rate": 6.130421640299594e-07, "loss": 61.5041, "step": 106010 }, { "epoch": 0.8770318898126318, "grad_norm": 734.4179077148438, "learning_rate": 6.123656797206873e-07, "loss": 85.497, "step": 106020 }, { "epoch": 0.8771146130620011, "grad_norm": 1206.48193359375, "learning_rate": 6.116895445153415e-07, "loss": 61.9635, "step": 106030 }, { "epoch": 0.8771973363113703, "grad_norm": 976.5135498046875, "learning_rate": 6.11013758467719e-07, "loss": 101.1229, "step": 106040 }, { "epoch": 0.8772800595607395, "grad_norm": 1087.355224609375, "learning_rate": 6.103383216315883e-07, "loss": 75.2539, "step": 106050 }, { "epoch": 0.8773627828101088, "grad_norm": 674.3126220703125, "learning_rate": 6.096632340606922e-07, "loss": 77.9221, "step": 106060 }, { "epoch": 0.877445506059478, "grad_norm": 753.2930297851562, "learning_rate": 6.089884958087439e-07, "loss": 87.5172, "step": 106070 }, { "epoch": 0.8775282293088472, "grad_norm": 640.8326416015625, "learning_rate": 6.083141069294285e-07, "loss": 74.8427, "step": 106080 }, { "epoch": 0.8776109525582165, "grad_norm": 772.4714965820312, "learning_rate": 6.07640067476406e-07, "loss": 132.3394, "step": 106090 }, { "epoch": 0.8776936758075857, "grad_norm": 566.04248046875, "learning_rate": 6.069663775033041e-07, "loss": 65.6599, "step": 106100 }, { "epoch": 0.8777763990569549, "grad_norm": 780.9393920898438, "learning_rate": 6.06293037063726e-07, "loss": 80.8205, "step": 106110 }, { "epoch": 0.8778591223063242, "grad_norm": 654.2213745117188, "learning_rate": 6.056200462112466e-07, "loss": 98.3166, "step": 106120 }, { "epoch": 0.8779418455556934, "grad_norm": 1010.9708862304688, "learning_rate": 6.049474049994125e-07, "loss": 77.5668, "step": 106130 }, { "epoch": 0.8780245688050626, "grad_norm": 871.8776245117188, "learning_rate": 6.042751134817431e-07, "loss": 87.4441, "step": 106140 }, { "epoch": 0.8781072920544319, "grad_norm": 801.5313110351562, "learning_rate": 6.03603171711728e-07, "loss": 70.5738, "step": 106150 }, { "epoch": 0.8781900153038011, "grad_norm": 602.4784545898438, "learning_rate": 6.029315797428331e-07, "loss": 87.0834, "step": 106160 }, { "epoch": 0.8782727385531703, "grad_norm": 798.4912109375, "learning_rate": 6.02260337628493e-07, "loss": 63.067, "step": 106170 }, { "epoch": 0.8783554618025396, "grad_norm": 1035.9962158203125, "learning_rate": 6.015894454221143e-07, "loss": 102.9226, "step": 106180 }, { "epoch": 0.8784381850519088, "grad_norm": 891.0628051757812, "learning_rate": 6.00918903177078e-07, "loss": 80.5977, "step": 106190 }, { "epoch": 0.878520908301278, "grad_norm": 597.5098876953125, "learning_rate": 6.002487109467347e-07, "loss": 72.3878, "step": 106200 }, { "epoch": 0.8786036315506474, "grad_norm": 920.417236328125, "learning_rate": 5.995788687844095e-07, "loss": 87.8151, "step": 106210 }, { "epoch": 0.8786863548000166, "grad_norm": 683.3025512695312, "learning_rate": 5.98909376743399e-07, "loss": 95.9658, "step": 106220 }, { "epoch": 0.8787690780493858, "grad_norm": 583.1339111328125, "learning_rate": 5.982402348769706e-07, "loss": 85.8737, "step": 106230 }, { "epoch": 0.8788518012987551, "grad_norm": 894.7290649414062, "learning_rate": 5.975714432383645e-07, "loss": 148.6901, "step": 106240 }, { "epoch": 0.8789345245481243, "grad_norm": 651.498291015625, "learning_rate": 5.969030018807953e-07, "loss": 69.2491, "step": 106250 }, { "epoch": 0.8790172477974935, "grad_norm": 544.6094360351562, "learning_rate": 5.962349108574478e-07, "loss": 72.5222, "step": 106260 }, { "epoch": 0.8790999710468628, "grad_norm": 1018.2208251953125, "learning_rate": 5.955671702214765e-07, "loss": 109.5223, "step": 106270 }, { "epoch": 0.879182694296232, "grad_norm": 892.130615234375, "learning_rate": 5.948997800260125e-07, "loss": 109.8219, "step": 106280 }, { "epoch": 0.8792654175456012, "grad_norm": 1923.7149658203125, "learning_rate": 5.94232740324156e-07, "loss": 79.6006, "step": 106290 }, { "epoch": 0.8793481407949705, "grad_norm": 582.2818603515625, "learning_rate": 5.935660511689805e-07, "loss": 97.3011, "step": 106300 }, { "epoch": 0.8794308640443397, "grad_norm": 1395.069580078125, "learning_rate": 5.928997126135317e-07, "loss": 101.4692, "step": 106310 }, { "epoch": 0.8795135872937089, "grad_norm": 628.81298828125, "learning_rate": 5.922337247108267e-07, "loss": 88.9791, "step": 106320 }, { "epoch": 0.8795963105430782, "grad_norm": 1247.3292236328125, "learning_rate": 5.915680875138558e-07, "loss": 81.8968, "step": 106330 }, { "epoch": 0.8796790337924474, "grad_norm": 532.4841918945312, "learning_rate": 5.909028010755786e-07, "loss": 105.6303, "step": 106340 }, { "epoch": 0.8797617570418166, "grad_norm": 748.1610717773438, "learning_rate": 5.902378654489327e-07, "loss": 93.8092, "step": 106350 }, { "epoch": 0.8798444802911859, "grad_norm": 1042.5313720703125, "learning_rate": 5.89573280686821e-07, "loss": 82.0542, "step": 106360 }, { "epoch": 0.8799272035405551, "grad_norm": 775.1285400390625, "learning_rate": 5.889090468421216e-07, "loss": 109.767, "step": 106370 }, { "epoch": 0.8800099267899243, "grad_norm": 807.12744140625, "learning_rate": 5.882451639676856e-07, "loss": 85.814, "step": 106380 }, { "epoch": 0.8800926500392935, "grad_norm": 712.1463012695312, "learning_rate": 5.875816321163346e-07, "loss": 80.7074, "step": 106390 }, { "epoch": 0.8801753732886628, "grad_norm": 937.6873168945312, "learning_rate": 5.869184513408633e-07, "loss": 99.54, "step": 106400 }, { "epoch": 0.880258096538032, "grad_norm": 647.0883178710938, "learning_rate": 5.862556216940368e-07, "loss": 66.8527, "step": 106410 }, { "epoch": 0.8803408197874012, "grad_norm": 748.40673828125, "learning_rate": 5.85593143228595e-07, "loss": 90.8509, "step": 106420 }, { "epoch": 0.8804235430367705, "grad_norm": 1141.7353515625, "learning_rate": 5.849310159972466e-07, "loss": 66.829, "step": 106430 }, { "epoch": 0.8805062662861397, "grad_norm": 880.968994140625, "learning_rate": 5.84269240052675e-07, "loss": 92.991, "step": 106440 }, { "epoch": 0.8805889895355089, "grad_norm": 803.4313354492188, "learning_rate": 5.836078154475349e-07, "loss": 126.6737, "step": 106450 }, { "epoch": 0.8806717127848782, "grad_norm": 478.593994140625, "learning_rate": 5.82946742234452e-07, "loss": 79.7044, "step": 106460 }, { "epoch": 0.8807544360342474, "grad_norm": 715.75146484375, "learning_rate": 5.822860204660253e-07, "loss": 70.8862, "step": 106470 }, { "epoch": 0.8808371592836166, "grad_norm": 524.4168090820312, "learning_rate": 5.816256501948259e-07, "loss": 90.5394, "step": 106480 }, { "epoch": 0.8809198825329859, "grad_norm": 752.4254150390625, "learning_rate": 5.809656314733953e-07, "loss": 86.0278, "step": 106490 }, { "epoch": 0.8810026057823551, "grad_norm": 623.67529296875, "learning_rate": 5.803059643542491e-07, "loss": 67.2863, "step": 106500 }, { "epoch": 0.8810853290317243, "grad_norm": 909.5820922851562, "learning_rate": 5.796466488898733e-07, "loss": 94.9997, "step": 106510 }, { "epoch": 0.8811680522810936, "grad_norm": 1068.1527099609375, "learning_rate": 5.789876851327275e-07, "loss": 110.8501, "step": 106520 }, { "epoch": 0.8812507755304628, "grad_norm": 2067.63671875, "learning_rate": 5.783290731352415e-07, "loss": 74.2425, "step": 106530 }, { "epoch": 0.881333498779832, "grad_norm": 889.1475830078125, "learning_rate": 5.776708129498188e-07, "loss": 105.7229, "step": 106540 }, { "epoch": 0.8814162220292013, "grad_norm": 828.7091674804688, "learning_rate": 5.770129046288331e-07, "loss": 111.1305, "step": 106550 }, { "epoch": 0.8814989452785705, "grad_norm": 997.931640625, "learning_rate": 5.76355348224632e-07, "loss": 110.2826, "step": 106560 }, { "epoch": 0.8815816685279397, "grad_norm": 1100.512451171875, "learning_rate": 5.756981437895342e-07, "loss": 103.0764, "step": 106570 }, { "epoch": 0.881664391777309, "grad_norm": 890.1248779296875, "learning_rate": 5.750412913758307e-07, "loss": 87.1903, "step": 106580 }, { "epoch": 0.8817471150266782, "grad_norm": 1222.4423828125, "learning_rate": 5.743847910357836e-07, "loss": 80.1742, "step": 106590 }, { "epoch": 0.8818298382760474, "grad_norm": 637.3331298828125, "learning_rate": 5.737286428216288e-07, "loss": 63.0052, "step": 106600 }, { "epoch": 0.8819125615254167, "grad_norm": 919.8414306640625, "learning_rate": 5.730728467855695e-07, "loss": 85.3765, "step": 106610 }, { "epoch": 0.881995284774786, "grad_norm": 661.7064208984375, "learning_rate": 5.724174029797886e-07, "loss": 64.6699, "step": 106620 }, { "epoch": 0.8820780080241551, "grad_norm": 944.8833618164062, "learning_rate": 5.717623114564347e-07, "loss": 80.0257, "step": 106630 }, { "epoch": 0.8821607312735245, "grad_norm": 992.529296875, "learning_rate": 5.711075722676312e-07, "loss": 102.9055, "step": 106640 }, { "epoch": 0.8822434545228937, "grad_norm": 691.0933227539062, "learning_rate": 5.704531854654721e-07, "loss": 75.375, "step": 106650 }, { "epoch": 0.8823261777722629, "grad_norm": 519.4947509765625, "learning_rate": 5.69799151102024e-07, "loss": 76.9458, "step": 106660 }, { "epoch": 0.8824089010216322, "grad_norm": 3443.64794921875, "learning_rate": 5.691454692293258e-07, "loss": 99.6053, "step": 106670 }, { "epoch": 0.8824916242710014, "grad_norm": 666.6217041015625, "learning_rate": 5.684921398993875e-07, "loss": 58.0222, "step": 106680 }, { "epoch": 0.8825743475203706, "grad_norm": 765.2488403320312, "learning_rate": 5.678391631641933e-07, "loss": 69.1236, "step": 106690 }, { "epoch": 0.8826570707697399, "grad_norm": 797.377197265625, "learning_rate": 5.671865390756948e-07, "loss": 73.226, "step": 106700 }, { "epoch": 0.8827397940191091, "grad_norm": 577.6158447265625, "learning_rate": 5.665342676858182e-07, "loss": 68.9044, "step": 106710 }, { "epoch": 0.8828225172684783, "grad_norm": 503.404541015625, "learning_rate": 5.658823490464638e-07, "loss": 60.142, "step": 106720 }, { "epoch": 0.8829052405178476, "grad_norm": 959.497314453125, "learning_rate": 5.652307832095016e-07, "loss": 98.6422, "step": 106730 }, { "epoch": 0.8829879637672168, "grad_norm": 983.735595703125, "learning_rate": 5.645795702267731e-07, "loss": 76.1658, "step": 106740 }, { "epoch": 0.883070687016586, "grad_norm": 817.6513671875, "learning_rate": 5.639287101500923e-07, "loss": 102.0763, "step": 106750 }, { "epoch": 0.8831534102659553, "grad_norm": 574.2067260742188, "learning_rate": 5.63278203031245e-07, "loss": 80.8163, "step": 106760 }, { "epoch": 0.8832361335153245, "grad_norm": 823.816650390625, "learning_rate": 5.626280489219893e-07, "loss": 62.7599, "step": 106770 }, { "epoch": 0.8833188567646937, "grad_norm": 1204.035888671875, "learning_rate": 5.619782478740565e-07, "loss": 77.9808, "step": 106780 }, { "epoch": 0.883401580014063, "grad_norm": 1444.1622314453125, "learning_rate": 5.613287999391453e-07, "loss": 104.0911, "step": 106790 }, { "epoch": 0.8834843032634322, "grad_norm": 976.5982666015625, "learning_rate": 5.606797051689294e-07, "loss": 66.8928, "step": 106800 }, { "epoch": 0.8835670265128014, "grad_norm": 785.950927734375, "learning_rate": 5.600309636150575e-07, "loss": 96.6358, "step": 106810 }, { "epoch": 0.8836497497621707, "grad_norm": 736.114990234375, "learning_rate": 5.593825753291443e-07, "loss": 73.0294, "step": 106820 }, { "epoch": 0.8837324730115399, "grad_norm": 567.788330078125, "learning_rate": 5.587345403627803e-07, "loss": 88.5388, "step": 106830 }, { "epoch": 0.8838151962609091, "grad_norm": 1305.841064453125, "learning_rate": 5.580868587675265e-07, "loss": 75.9461, "step": 106840 }, { "epoch": 0.8838979195102784, "grad_norm": 732.5675048828125, "learning_rate": 5.574395305949148e-07, "loss": 97.3696, "step": 106850 }, { "epoch": 0.8839806427596476, "grad_norm": 1100.3834228515625, "learning_rate": 5.567925558964532e-07, "loss": 80.1282, "step": 106860 }, { "epoch": 0.8840633660090168, "grad_norm": 1010.8685913085938, "learning_rate": 5.561459347236148e-07, "loss": 90.0034, "step": 106870 }, { "epoch": 0.8841460892583861, "grad_norm": 371.01641845703125, "learning_rate": 5.554996671278495e-07, "loss": 85.3001, "step": 106880 }, { "epoch": 0.8842288125077553, "grad_norm": 580.9638671875, "learning_rate": 5.548537531605785e-07, "loss": 85.6992, "step": 106890 }, { "epoch": 0.8843115357571245, "grad_norm": 764.029052734375, "learning_rate": 5.542081928731929e-07, "loss": 57.3023, "step": 106900 }, { "epoch": 0.8843942590064938, "grad_norm": 610.1265258789062, "learning_rate": 5.535629863170594e-07, "loss": 80.0151, "step": 106910 }, { "epoch": 0.884476982255863, "grad_norm": 615.563720703125, "learning_rate": 5.529181335435124e-07, "loss": 81.8557, "step": 106920 }, { "epoch": 0.8845597055052322, "grad_norm": 1012.834716796875, "learning_rate": 5.522736346038598e-07, "loss": 95.8173, "step": 106930 }, { "epoch": 0.8846424287546015, "grad_norm": 1021.8489990234375, "learning_rate": 5.516294895493824e-07, "loss": 78.0766, "step": 106940 }, { "epoch": 0.8847251520039707, "grad_norm": 827.3118286132812, "learning_rate": 5.509856984313316e-07, "loss": 80.5865, "step": 106950 }, { "epoch": 0.8848078752533399, "grad_norm": 990.0963745117188, "learning_rate": 5.503422613009296e-07, "loss": 69.4347, "step": 106960 }, { "epoch": 0.8848905985027092, "grad_norm": 593.1627197265625, "learning_rate": 5.49699178209373e-07, "loss": 66.5936, "step": 106970 }, { "epoch": 0.8849733217520784, "grad_norm": 835.3819580078125, "learning_rate": 5.490564492078287e-07, "loss": 93.1555, "step": 106980 }, { "epoch": 0.8850560450014476, "grad_norm": 934.9071044921875, "learning_rate": 5.484140743474359e-07, "loss": 92.1782, "step": 106990 }, { "epoch": 0.8851387682508169, "grad_norm": 756.7869873046875, "learning_rate": 5.477720536793035e-07, "loss": 87.1874, "step": 107000 }, { "epoch": 0.8852214915001861, "grad_norm": 781.40771484375, "learning_rate": 5.471303872545175e-07, "loss": 97.5913, "step": 107010 }, { "epoch": 0.8853042147495553, "grad_norm": 991.211181640625, "learning_rate": 5.464890751241303e-07, "loss": 74.9666, "step": 107020 }, { "epoch": 0.8853869379989247, "grad_norm": 1911.62109375, "learning_rate": 5.458481173391694e-07, "loss": 87.3059, "step": 107030 }, { "epoch": 0.8854696612482938, "grad_norm": 1125.0557861328125, "learning_rate": 5.452075139506314e-07, "loss": 97.0107, "step": 107040 }, { "epoch": 0.885552384497663, "grad_norm": 1039.9718017578125, "learning_rate": 5.445672650094863e-07, "loss": 114.9293, "step": 107050 }, { "epoch": 0.8856351077470324, "grad_norm": 1257.056396484375, "learning_rate": 5.43927370566677e-07, "loss": 100.9882, "step": 107060 }, { "epoch": 0.8857178309964016, "grad_norm": 506.9411926269531, "learning_rate": 5.432878306731154e-07, "loss": 72.05, "step": 107070 }, { "epoch": 0.8858005542457708, "grad_norm": 1244.661376953125, "learning_rate": 5.426486453796881e-07, "loss": 103.0482, "step": 107080 }, { "epoch": 0.8858832774951401, "grad_norm": 558.5795288085938, "learning_rate": 5.420098147372515e-07, "loss": 87.9579, "step": 107090 }, { "epoch": 0.8859660007445093, "grad_norm": 582.4910278320312, "learning_rate": 5.413713387966329e-07, "loss": 90.7297, "step": 107100 }, { "epoch": 0.8860487239938785, "grad_norm": 797.0523681640625, "learning_rate": 5.407332176086367e-07, "loss": 72.4927, "step": 107110 }, { "epoch": 0.8861314472432477, "grad_norm": 889.9099731445312, "learning_rate": 5.400954512240331e-07, "loss": 59.4978, "step": 107120 }, { "epoch": 0.886214170492617, "grad_norm": 705.6456909179688, "learning_rate": 5.394580396935656e-07, "loss": 71.2413, "step": 107130 }, { "epoch": 0.8862968937419862, "grad_norm": 1448.95751953125, "learning_rate": 5.388209830679508e-07, "loss": 86.1324, "step": 107140 }, { "epoch": 0.8863796169913554, "grad_norm": 619.7808227539062, "learning_rate": 5.381842813978766e-07, "loss": 88.696, "step": 107150 }, { "epoch": 0.8864623402407247, "grad_norm": 811.8013916015625, "learning_rate": 5.375479347340018e-07, "loss": 69.7523, "step": 107160 }, { "epoch": 0.8865450634900939, "grad_norm": 882.953369140625, "learning_rate": 5.369119431269582e-07, "loss": 73.939, "step": 107170 }, { "epoch": 0.8866277867394631, "grad_norm": 563.3268432617188, "learning_rate": 5.362763066273479e-07, "loss": 89.5063, "step": 107180 }, { "epoch": 0.8867105099888324, "grad_norm": 741.295166015625, "learning_rate": 5.356410252857458e-07, "loss": 136.3258, "step": 107190 }, { "epoch": 0.8867932332382016, "grad_norm": 918.1881103515625, "learning_rate": 5.350060991526996e-07, "loss": 87.6655, "step": 107200 }, { "epoch": 0.8868759564875708, "grad_norm": 571.861572265625, "learning_rate": 5.343715282787271e-07, "loss": 80.5383, "step": 107210 }, { "epoch": 0.8869586797369401, "grad_norm": 1086.863037109375, "learning_rate": 5.33737312714317e-07, "loss": 95.8323, "step": 107220 }, { "epoch": 0.8870414029863093, "grad_norm": 937.6774291992188, "learning_rate": 5.33103452509931e-07, "loss": 79.3969, "step": 107230 }, { "epoch": 0.8871241262356785, "grad_norm": 990.2754516601562, "learning_rate": 5.32469947716004e-07, "loss": 83.5339, "step": 107240 }, { "epoch": 0.8872068494850478, "grad_norm": 578.4818115234375, "learning_rate": 5.318367983829393e-07, "loss": 88.6761, "step": 107250 }, { "epoch": 0.887289572734417, "grad_norm": 930.7289428710938, "learning_rate": 5.312040045611144e-07, "loss": 108.0735, "step": 107260 }, { "epoch": 0.8873722959837862, "grad_norm": 776.857666015625, "learning_rate": 5.305715663008781e-07, "loss": 90.2431, "step": 107270 }, { "epoch": 0.8874550192331555, "grad_norm": 1201.76171875, "learning_rate": 5.299394836525507e-07, "loss": 78.6439, "step": 107280 }, { "epoch": 0.8875377424825247, "grad_norm": 609.2925415039062, "learning_rate": 5.293077566664234e-07, "loss": 98.0536, "step": 107290 }, { "epoch": 0.8876204657318939, "grad_norm": 1422.3968505859375, "learning_rate": 5.286763853927601e-07, "loss": 97.6561, "step": 107300 }, { "epoch": 0.8877031889812632, "grad_norm": 1007.32568359375, "learning_rate": 5.280453698817961e-07, "loss": 73.6183, "step": 107310 }, { "epoch": 0.8877859122306324, "grad_norm": 1175.0106201171875, "learning_rate": 5.27414710183739e-07, "loss": 96.213, "step": 107320 }, { "epoch": 0.8878686354800016, "grad_norm": 509.9411926269531, "learning_rate": 5.26784406348767e-07, "loss": 113.796, "step": 107330 }, { "epoch": 0.8879513587293709, "grad_norm": 646.4354858398438, "learning_rate": 5.261544584270301e-07, "loss": 102.4905, "step": 107340 }, { "epoch": 0.8880340819787401, "grad_norm": 795.3406372070312, "learning_rate": 5.255248664686507e-07, "loss": 91.493, "step": 107350 }, { "epoch": 0.8881168052281093, "grad_norm": 746.2446899414062, "learning_rate": 5.24895630523723e-07, "loss": 75.7493, "step": 107360 }, { "epoch": 0.8881995284774786, "grad_norm": 759.6536865234375, "learning_rate": 5.242667506423122e-07, "loss": 78.0097, "step": 107370 }, { "epoch": 0.8882822517268478, "grad_norm": 706.584228515625, "learning_rate": 5.236382268744544e-07, "loss": 65.1131, "step": 107380 }, { "epoch": 0.888364974976217, "grad_norm": 904.0101928710938, "learning_rate": 5.230100592701598e-07, "loss": 94.9032, "step": 107390 }, { "epoch": 0.8884476982255863, "grad_norm": 1153.80908203125, "learning_rate": 5.223822478794083e-07, "loss": 92.7698, "step": 107400 }, { "epoch": 0.8885304214749555, "grad_norm": 827.2999877929688, "learning_rate": 5.217547927521515e-07, "loss": 87.4861, "step": 107410 }, { "epoch": 0.8886131447243247, "grad_norm": 802.9961547851562, "learning_rate": 5.211276939383136e-07, "loss": 74.549, "step": 107420 }, { "epoch": 0.888695867973694, "grad_norm": 1088.9659423828125, "learning_rate": 5.205009514877895e-07, "loss": 94.3598, "step": 107430 }, { "epoch": 0.8887785912230632, "grad_norm": 792.8547973632812, "learning_rate": 5.198745654504472e-07, "loss": 89.176, "step": 107440 }, { "epoch": 0.8888613144724324, "grad_norm": 344.9277038574219, "learning_rate": 5.19248535876124e-07, "loss": 91.6492, "step": 107450 }, { "epoch": 0.8889440377218017, "grad_norm": 0.0, "learning_rate": 5.186228628146317e-07, "loss": 69.1638, "step": 107460 }, { "epoch": 0.889026760971171, "grad_norm": 1069.0447998046875, "learning_rate": 5.179975463157511e-07, "loss": 114.1318, "step": 107470 }, { "epoch": 0.8891094842205401, "grad_norm": 491.0845642089844, "learning_rate": 5.173725864292356e-07, "loss": 81.6012, "step": 107480 }, { "epoch": 0.8891922074699095, "grad_norm": 585.45654296875, "learning_rate": 5.167479832048117e-07, "loss": 101.9051, "step": 107490 }, { "epoch": 0.8892749307192787, "grad_norm": 1258.58984375, "learning_rate": 5.16123736692175e-07, "loss": 72.9357, "step": 107500 }, { "epoch": 0.8893576539686479, "grad_norm": 1067.1220703125, "learning_rate": 5.154998469409945e-07, "loss": 106.5081, "step": 107510 }, { "epoch": 0.8894403772180172, "grad_norm": 908.0804443359375, "learning_rate": 5.1487631400091e-07, "loss": 80.5405, "step": 107520 }, { "epoch": 0.8895231004673864, "grad_norm": 528.5298461914062, "learning_rate": 5.142531379215338e-07, "loss": 68.822, "step": 107530 }, { "epoch": 0.8896058237167556, "grad_norm": 997.2656860351562, "learning_rate": 5.136303187524478e-07, "loss": 88.9139, "step": 107540 }, { "epoch": 0.8896885469661249, "grad_norm": 1044.582275390625, "learning_rate": 5.130078565432089e-07, "loss": 82.6202, "step": 107550 }, { "epoch": 0.8897712702154941, "grad_norm": 683.0887451171875, "learning_rate": 5.123857513433406e-07, "loss": 77.2961, "step": 107560 }, { "epoch": 0.8898539934648633, "grad_norm": 307.61688232421875, "learning_rate": 5.117640032023436e-07, "loss": 82.64, "step": 107570 }, { "epoch": 0.8899367167142326, "grad_norm": 549.88720703125, "learning_rate": 5.111426121696866e-07, "loss": 84.3351, "step": 107580 }, { "epoch": 0.8900194399636018, "grad_norm": 685.7962646484375, "learning_rate": 5.105215782948108e-07, "loss": 86.4093, "step": 107590 }, { "epoch": 0.890102163212971, "grad_norm": 4017.52880859375, "learning_rate": 5.099009016271295e-07, "loss": 93.5784, "step": 107600 }, { "epoch": 0.8901848864623403, "grad_norm": 562.3822631835938, "learning_rate": 5.092805822160262e-07, "loss": 70.4941, "step": 107610 }, { "epoch": 0.8902676097117095, "grad_norm": 987.6434936523438, "learning_rate": 5.086606201108574e-07, "loss": 84.2584, "step": 107620 }, { "epoch": 0.8903503329610787, "grad_norm": 864.2491455078125, "learning_rate": 5.080410153609511e-07, "loss": 103.629, "step": 107630 }, { "epoch": 0.890433056210448, "grad_norm": 553.91064453125, "learning_rate": 5.074217680156062e-07, "loss": 72.658, "step": 107640 }, { "epoch": 0.8905157794598172, "grad_norm": 913.7005615234375, "learning_rate": 5.068028781240925e-07, "loss": 83.6153, "step": 107650 }, { "epoch": 0.8905985027091864, "grad_norm": 643.3348388671875, "learning_rate": 5.061843457356519e-07, "loss": 92.5445, "step": 107660 }, { "epoch": 0.8906812259585557, "grad_norm": 616.33984375, "learning_rate": 5.055661708994996e-07, "loss": 101.3816, "step": 107670 }, { "epoch": 0.8907639492079249, "grad_norm": 926.9981079101562, "learning_rate": 5.049483536648209e-07, "loss": 75.4115, "step": 107680 }, { "epoch": 0.8908466724572941, "grad_norm": 888.5517578125, "learning_rate": 5.043308940807717e-07, "loss": 90.7422, "step": 107690 }, { "epoch": 0.8909293957066634, "grad_norm": 884.9143676757812, "learning_rate": 5.037137921964814e-07, "loss": 93.778, "step": 107700 }, { "epoch": 0.8910121189560326, "grad_norm": 1310.1082763671875, "learning_rate": 5.030970480610492e-07, "loss": 92.942, "step": 107710 }, { "epoch": 0.8910948422054018, "grad_norm": 1047.660888671875, "learning_rate": 5.024806617235484e-07, "loss": 86.2382, "step": 107720 }, { "epoch": 0.8911775654547711, "grad_norm": 638.484619140625, "learning_rate": 5.01864633233019e-07, "loss": 103.3557, "step": 107730 }, { "epoch": 0.8912602887041403, "grad_norm": 1222.545654296875, "learning_rate": 5.01248962638477e-07, "loss": 81.7074, "step": 107740 }, { "epoch": 0.8913430119535095, "grad_norm": 648.8272705078125, "learning_rate": 5.006336499889075e-07, "loss": 87.4129, "step": 107750 }, { "epoch": 0.8914257352028788, "grad_norm": 611.2053833007812, "learning_rate": 5.000186953332709e-07, "loss": 77.1965, "step": 107760 }, { "epoch": 0.891508458452248, "grad_norm": 863.5994262695312, "learning_rate": 4.99404098720494e-07, "loss": 83.2366, "step": 107770 }, { "epoch": 0.8915911817016172, "grad_norm": 1112.973388671875, "learning_rate": 4.987898601994778e-07, "loss": 94.6965, "step": 107780 }, { "epoch": 0.8916739049509865, "grad_norm": 807.7054443359375, "learning_rate": 4.981759798190949e-07, "loss": 70.4171, "step": 107790 }, { "epoch": 0.8917566282003557, "grad_norm": 845.5135498046875, "learning_rate": 4.97562457628189e-07, "loss": 95.9467, "step": 107800 }, { "epoch": 0.8918393514497249, "grad_norm": 1642.2303466796875, "learning_rate": 4.969492936755759e-07, "loss": 71.8787, "step": 107810 }, { "epoch": 0.8919220746990941, "grad_norm": 796.374755859375, "learning_rate": 4.963364880100402e-07, "loss": 104.153, "step": 107820 }, { "epoch": 0.8920047979484634, "grad_norm": 420.7682800292969, "learning_rate": 4.957240406803409e-07, "loss": 78.5665, "step": 107830 }, { "epoch": 0.8920875211978326, "grad_norm": 785.2117309570312, "learning_rate": 4.951119517352082e-07, "loss": 84.3331, "step": 107840 }, { "epoch": 0.8921702444472018, "grad_norm": 341.3686828613281, "learning_rate": 4.945002212233412e-07, "loss": 83.6106, "step": 107850 }, { "epoch": 0.8922529676965711, "grad_norm": 785.5858154296875, "learning_rate": 4.938888491934158e-07, "loss": 72.6205, "step": 107860 }, { "epoch": 0.8923356909459403, "grad_norm": 1233.1529541015625, "learning_rate": 4.932778356940743e-07, "loss": 132.9701, "step": 107870 }, { "epoch": 0.8924184141953095, "grad_norm": 772.5987548828125, "learning_rate": 4.926671807739319e-07, "loss": 69.4721, "step": 107880 }, { "epoch": 0.8925011374446788, "grad_norm": 575.5115966796875, "learning_rate": 4.920568844815776e-07, "loss": 90.4714, "step": 107890 }, { "epoch": 0.892583860694048, "grad_norm": 895.2702026367188, "learning_rate": 4.914469468655675e-07, "loss": 64.3026, "step": 107900 }, { "epoch": 0.8926665839434172, "grad_norm": 973.0512084960938, "learning_rate": 4.908373679744316e-07, "loss": 69.2861, "step": 107910 }, { "epoch": 0.8927493071927866, "grad_norm": 948.7655639648438, "learning_rate": 4.902281478566728e-07, "loss": 86.1326, "step": 107920 }, { "epoch": 0.8928320304421558, "grad_norm": 656.8770751953125, "learning_rate": 4.896192865607629e-07, "loss": 78.8228, "step": 107930 }, { "epoch": 0.892914753691525, "grad_norm": 1080.7999267578125, "learning_rate": 4.890107841351466e-07, "loss": 86.0108, "step": 107940 }, { "epoch": 0.8929974769408943, "grad_norm": 1222.696533203125, "learning_rate": 4.884026406282383e-07, "loss": 82.2702, "step": 107950 }, { "epoch": 0.8930802001902635, "grad_norm": 1044.7401123046875, "learning_rate": 4.877948560884277e-07, "loss": 83.305, "step": 107960 }, { "epoch": 0.8931629234396327, "grad_norm": 748.3582153320312, "learning_rate": 4.871874305640723e-07, "loss": 77.1618, "step": 107970 }, { "epoch": 0.893245646689002, "grad_norm": 833.1705932617188, "learning_rate": 4.865803641035027e-07, "loss": 83.6409, "step": 107980 }, { "epoch": 0.8933283699383712, "grad_norm": 888.131591796875, "learning_rate": 4.859736567550188e-07, "loss": 99.5259, "step": 107990 }, { "epoch": 0.8934110931877404, "grad_norm": 908.0950927734375, "learning_rate": 4.853673085668947e-07, "loss": 103.253, "step": 108000 }, { "epoch": 0.8934938164371097, "grad_norm": 908.1394653320312, "learning_rate": 4.84761319587374e-07, "loss": 63.2051, "step": 108010 }, { "epoch": 0.8935765396864789, "grad_norm": 761.0054931640625, "learning_rate": 4.841556898646732e-07, "loss": 73.0463, "step": 108020 }, { "epoch": 0.8936592629358481, "grad_norm": 886.8889770507812, "learning_rate": 4.835504194469792e-07, "loss": 85.4193, "step": 108030 }, { "epoch": 0.8937419861852174, "grad_norm": 672.312744140625, "learning_rate": 4.829455083824508e-07, "loss": 91.8696, "step": 108040 }, { "epoch": 0.8938247094345866, "grad_norm": 700.14599609375, "learning_rate": 4.823409567192172e-07, "loss": 88.3726, "step": 108050 }, { "epoch": 0.8939074326839558, "grad_norm": 1457.9686279296875, "learning_rate": 4.817367645053806e-07, "loss": 94.6632, "step": 108060 }, { "epoch": 0.8939901559333251, "grad_norm": 1704.705810546875, "learning_rate": 4.811329317890151e-07, "loss": 94.3083, "step": 108070 }, { "epoch": 0.8940728791826943, "grad_norm": 720.1951293945312, "learning_rate": 4.805294586181624e-07, "loss": 87.1297, "step": 108080 }, { "epoch": 0.8941556024320635, "grad_norm": 581.7968139648438, "learning_rate": 4.799263450408386e-07, "loss": 66.5786, "step": 108090 }, { "epoch": 0.8942383256814328, "grad_norm": 1700.38232421875, "learning_rate": 4.79323591105032e-07, "loss": 110.1113, "step": 108100 }, { "epoch": 0.894321048930802, "grad_norm": 746.1917724609375, "learning_rate": 4.787211968586996e-07, "loss": 78.2092, "step": 108110 }, { "epoch": 0.8944037721801712, "grad_norm": 1086.206298828125, "learning_rate": 4.781191623497716e-07, "loss": 102.1627, "step": 108120 }, { "epoch": 0.8944864954295405, "grad_norm": 983.9944458007812, "learning_rate": 4.775174876261496e-07, "loss": 101.4622, "step": 108130 }, { "epoch": 0.8945692186789097, "grad_norm": 724.5037231445312, "learning_rate": 4.769161727357047e-07, "loss": 98.8229, "step": 108140 }, { "epoch": 0.8946519419282789, "grad_norm": 1058.3212890625, "learning_rate": 4.763152177262836e-07, "loss": 78.5956, "step": 108150 }, { "epoch": 0.8947346651776482, "grad_norm": 1093.287841796875, "learning_rate": 4.757146226456988e-07, "loss": 128.6798, "step": 108160 }, { "epoch": 0.8948173884270174, "grad_norm": 1089.8331298828125, "learning_rate": 4.75114387541738e-07, "loss": 102.0988, "step": 108170 }, { "epoch": 0.8949001116763866, "grad_norm": 221.10438537597656, "learning_rate": 4.7451451246215863e-07, "loss": 87.4969, "step": 108180 }, { "epoch": 0.8949828349257559, "grad_norm": 798.0653076171875, "learning_rate": 4.7391499745469026e-07, "loss": 77.1766, "step": 108190 }, { "epoch": 0.8950655581751251, "grad_norm": 767.3575439453125, "learning_rate": 4.733158425670342e-07, "loss": 78.9256, "step": 108200 }, { "epoch": 0.8951482814244943, "grad_norm": 1510.5670166015625, "learning_rate": 4.727170478468612e-07, "loss": 112.9961, "step": 108210 }, { "epoch": 0.8952310046738636, "grad_norm": 2023.6324462890625, "learning_rate": 4.721186133418154e-07, "loss": 107.9993, "step": 108220 }, { "epoch": 0.8953137279232328, "grad_norm": 434.2220458984375, "learning_rate": 4.71520539099512e-07, "loss": 86.038, "step": 108230 }, { "epoch": 0.895396451172602, "grad_norm": 890.50048828125, "learning_rate": 4.709228251675357e-07, "loss": 90.7458, "step": 108240 }, { "epoch": 0.8954791744219713, "grad_norm": 873.815673828125, "learning_rate": 4.7032547159344466e-07, "loss": 91.8802, "step": 108250 }, { "epoch": 0.8955618976713405, "grad_norm": 899.265869140625, "learning_rate": 4.6972847842476743e-07, "loss": 121.3053, "step": 108260 }, { "epoch": 0.8956446209207097, "grad_norm": 987.6566162109375, "learning_rate": 4.6913184570900436e-07, "loss": 125.955, "step": 108270 }, { "epoch": 0.895727344170079, "grad_norm": 746.0098876953125, "learning_rate": 4.685355734936264e-07, "loss": 93.6544, "step": 108280 }, { "epoch": 0.8958100674194482, "grad_norm": 577.643310546875, "learning_rate": 4.6793966182607564e-07, "loss": 85.8028, "step": 108290 }, { "epoch": 0.8958927906688174, "grad_norm": 748.9794921875, "learning_rate": 4.673441107537674e-07, "loss": 79.7184, "step": 108300 }, { "epoch": 0.8959755139181868, "grad_norm": 652.255126953125, "learning_rate": 4.6674892032408605e-07, "loss": 81.2928, "step": 108310 }, { "epoch": 0.896058237167556, "grad_norm": 1240.67822265625, "learning_rate": 4.661540905843881e-07, "loss": 94.0372, "step": 108320 }, { "epoch": 0.8961409604169251, "grad_norm": 807.7527465820312, "learning_rate": 4.655596215820013e-07, "loss": 78.2325, "step": 108330 }, { "epoch": 0.8962236836662945, "grad_norm": 633.8656616210938, "learning_rate": 4.649655133642256e-07, "loss": 96.0483, "step": 108340 }, { "epoch": 0.8963064069156637, "grad_norm": 622.1317749023438, "learning_rate": 4.643717659783309e-07, "loss": 88.281, "step": 108350 }, { "epoch": 0.8963891301650329, "grad_norm": 937.6145629882812, "learning_rate": 4.637783794715589e-07, "loss": 81.1072, "step": 108360 }, { "epoch": 0.8964718534144022, "grad_norm": 1903.2171630859375, "learning_rate": 4.6318535389112296e-07, "loss": 86.6768, "step": 108370 }, { "epoch": 0.8965545766637714, "grad_norm": 1076.609619140625, "learning_rate": 4.6259268928420753e-07, "loss": 98.4922, "step": 108380 }, { "epoch": 0.8966372999131406, "grad_norm": 897.179443359375, "learning_rate": 4.620003856979671e-07, "loss": 92.1115, "step": 108390 }, { "epoch": 0.8967200231625099, "grad_norm": 866.747314453125, "learning_rate": 4.6140844317953013e-07, "loss": 92.1406, "step": 108400 }, { "epoch": 0.8968027464118791, "grad_norm": 500.00457763671875, "learning_rate": 4.6081686177599395e-07, "loss": 66.3908, "step": 108410 }, { "epoch": 0.8968854696612483, "grad_norm": 782.8357543945312, "learning_rate": 4.602256415344275e-07, "loss": 64.6097, "step": 108420 }, { "epoch": 0.8969681929106176, "grad_norm": 1170.851318359375, "learning_rate": 4.5963478250187266e-07, "loss": 92.5552, "step": 108430 }, { "epoch": 0.8970509161599868, "grad_norm": 1039.9666748046875, "learning_rate": 4.5904428472534014e-07, "loss": 93.8967, "step": 108440 }, { "epoch": 0.897133639409356, "grad_norm": 638.94091796875, "learning_rate": 4.58454148251814e-07, "loss": 75.4576, "step": 108450 }, { "epoch": 0.8972163626587253, "grad_norm": 805.0771484375, "learning_rate": 4.578643731282484e-07, "loss": 77.5966, "step": 108460 }, { "epoch": 0.8972990859080945, "grad_norm": 896.1200561523438, "learning_rate": 4.5727495940156906e-07, "loss": 89.5274, "step": 108470 }, { "epoch": 0.8973818091574637, "grad_norm": 995.3990478515625, "learning_rate": 4.56685907118673e-07, "loss": 81.9692, "step": 108480 }, { "epoch": 0.897464532406833, "grad_norm": 967.8817749023438, "learning_rate": 4.560972163264282e-07, "loss": 85.0128, "step": 108490 }, { "epoch": 0.8975472556562022, "grad_norm": 1025.315185546875, "learning_rate": 4.5550888707167505e-07, "loss": 98.1567, "step": 108500 }, { "epoch": 0.8976299789055714, "grad_norm": 477.4637756347656, "learning_rate": 4.549209194012216e-07, "loss": 67.9016, "step": 108510 }, { "epoch": 0.8977127021549407, "grad_norm": 873.9866943359375, "learning_rate": 4.543333133618522e-07, "loss": 98.0505, "step": 108520 }, { "epoch": 0.8977954254043099, "grad_norm": 1049.532958984375, "learning_rate": 4.537460690003198e-07, "loss": 67.647, "step": 108530 }, { "epoch": 0.8978781486536791, "grad_norm": 1115.316162109375, "learning_rate": 4.531591863633478e-07, "loss": 82.5628, "step": 108540 }, { "epoch": 0.8979608719030483, "grad_norm": 532.7764892578125, "learning_rate": 4.5257266549763203e-07, "loss": 51.0, "step": 108550 }, { "epoch": 0.8980435951524176, "grad_norm": 729.6134033203125, "learning_rate": 4.5198650644983965e-07, "loss": 114.1188, "step": 108560 }, { "epoch": 0.8981263184017868, "grad_norm": 739.4089965820312, "learning_rate": 4.514007092666084e-07, "loss": 87.889, "step": 108570 }, { "epoch": 0.898209041651156, "grad_norm": 662.3689575195312, "learning_rate": 4.5081527399454814e-07, "loss": 102.5013, "step": 108580 }, { "epoch": 0.8982917649005253, "grad_norm": 909.9256591796875, "learning_rate": 4.502302006802378e-07, "loss": 82.6323, "step": 108590 }, { "epoch": 0.8983744881498945, "grad_norm": 909.2214965820312, "learning_rate": 4.496454893702301e-07, "loss": 78.7108, "step": 108600 }, { "epoch": 0.8984572113992637, "grad_norm": 580.1622314453125, "learning_rate": 4.4906114011104616e-07, "loss": 59.7331, "step": 108610 }, { "epoch": 0.898539934648633, "grad_norm": 1207.3009033203125, "learning_rate": 4.4847715294918215e-07, "loss": 84.35, "step": 108620 }, { "epoch": 0.8986226578980022, "grad_norm": 520.7291259765625, "learning_rate": 4.478935279311031e-07, "loss": 91.6451, "step": 108630 }, { "epoch": 0.8987053811473714, "grad_norm": 703.8441162109375, "learning_rate": 4.4731026510324406e-07, "loss": 97.7126, "step": 108640 }, { "epoch": 0.8987881043967407, "grad_norm": 707.9328002929688, "learning_rate": 4.4672736451201347e-07, "loss": 93.7411, "step": 108650 }, { "epoch": 0.8988708276461099, "grad_norm": 912.7407836914062, "learning_rate": 4.461448262037893e-07, "loss": 107.5078, "step": 108660 }, { "epoch": 0.8989535508954791, "grad_norm": 891.9956665039062, "learning_rate": 4.455626502249233e-07, "loss": 66.6602, "step": 108670 }, { "epoch": 0.8990362741448484, "grad_norm": 736.6570434570312, "learning_rate": 4.4498083662173396e-07, "loss": 52.5633, "step": 108680 }, { "epoch": 0.8991189973942176, "grad_norm": 798.45703125, "learning_rate": 4.443993854405154e-07, "loss": 95.3546, "step": 108690 }, { "epoch": 0.8992017206435868, "grad_norm": 643.7161865234375, "learning_rate": 4.4381829672752896e-07, "loss": 111.5154, "step": 108700 }, { "epoch": 0.8992844438929561, "grad_norm": 1088.9283447265625, "learning_rate": 4.4323757052901153e-07, "loss": 87.3449, "step": 108710 }, { "epoch": 0.8993671671423253, "grad_norm": 626.2893676757812, "learning_rate": 4.4265720689116776e-07, "loss": 87.1247, "step": 108720 }, { "epoch": 0.8994498903916945, "grad_norm": 608.726806640625, "learning_rate": 4.420772058601747e-07, "loss": 86.0277, "step": 108730 }, { "epoch": 0.8995326136410638, "grad_norm": 433.5987243652344, "learning_rate": 4.414975674821803e-07, "loss": 73.3416, "step": 108740 }, { "epoch": 0.899615336890433, "grad_norm": 1192.2998046875, "learning_rate": 4.4091829180330503e-07, "loss": 65.4931, "step": 108750 }, { "epoch": 0.8996980601398022, "grad_norm": 438.9178466796875, "learning_rate": 4.40339378869637e-07, "loss": 76.4078, "step": 108760 }, { "epoch": 0.8997807833891716, "grad_norm": 786.5850219726562, "learning_rate": 4.3976082872723814e-07, "loss": 68.2923, "step": 108770 }, { "epoch": 0.8998635066385408, "grad_norm": 1577.2021484375, "learning_rate": 4.3918264142214173e-07, "loss": 98.498, "step": 108780 }, { "epoch": 0.89994622988791, "grad_norm": 880.0924072265625, "learning_rate": 4.3860481700035096e-07, "loss": 84.358, "step": 108790 }, { "epoch": 0.9000289531372793, "grad_norm": 2280.10693359375, "learning_rate": 4.3802735550784014e-07, "loss": 75.876, "step": 108800 }, { "epoch": 0.9001116763866485, "grad_norm": 687.7626953125, "learning_rate": 4.37450256990557e-07, "loss": 121.7229, "step": 108810 }, { "epoch": 0.9001943996360177, "grad_norm": 1037.4595947265625, "learning_rate": 4.368735214944181e-07, "loss": 87.6477, "step": 108820 }, { "epoch": 0.900277122885387, "grad_norm": 5595.93359375, "learning_rate": 4.362971490653106e-07, "loss": 120.0495, "step": 108830 }, { "epoch": 0.9003598461347562, "grad_norm": 1015.2440795898438, "learning_rate": 4.357211397490951e-07, "loss": 81.7864, "step": 108840 }, { "epoch": 0.9004425693841254, "grad_norm": 632.1942749023438, "learning_rate": 4.351454935916011e-07, "loss": 83.9828, "step": 108850 }, { "epoch": 0.9005252926334947, "grad_norm": 1923.382568359375, "learning_rate": 4.3457021063862957e-07, "loss": 84.9657, "step": 108860 }, { "epoch": 0.9006080158828639, "grad_norm": 621.4093627929688, "learning_rate": 4.339952909359546e-07, "loss": 86.5398, "step": 108870 }, { "epoch": 0.9006907391322331, "grad_norm": 1436.72021484375, "learning_rate": 4.3342073452931845e-07, "loss": 89.1011, "step": 108880 }, { "epoch": 0.9007734623816024, "grad_norm": 388.8465881347656, "learning_rate": 4.328465414644373e-07, "loss": 75.2237, "step": 108890 }, { "epoch": 0.9008561856309716, "grad_norm": 778.0177612304688, "learning_rate": 4.322727117869951e-07, "loss": 99.6206, "step": 108900 }, { "epoch": 0.9009389088803408, "grad_norm": 582.366455078125, "learning_rate": 4.3169924554265165e-07, "loss": 82.2605, "step": 108910 }, { "epoch": 0.9010216321297101, "grad_norm": 1931.376953125, "learning_rate": 4.3112614277703304e-07, "loss": 106.5273, "step": 108920 }, { "epoch": 0.9011043553790793, "grad_norm": 899.9615478515625, "learning_rate": 4.3055340353574004e-07, "loss": 89.0014, "step": 108930 }, { "epoch": 0.9011870786284485, "grad_norm": 740.9810791015625, "learning_rate": 4.299810278643407e-07, "loss": 104.6826, "step": 108940 }, { "epoch": 0.9012698018778178, "grad_norm": 413.1753845214844, "learning_rate": 4.2940901580837744e-07, "loss": 94.5246, "step": 108950 }, { "epoch": 0.901352525127187, "grad_norm": 717.9979858398438, "learning_rate": 4.2883736741336277e-07, "loss": 57.4638, "step": 108960 }, { "epoch": 0.9014352483765562, "grad_norm": 962.7039184570312, "learning_rate": 4.282660827247803e-07, "loss": 105.8891, "step": 108970 }, { "epoch": 0.9015179716259255, "grad_norm": 647.7130737304688, "learning_rate": 4.2769516178808366e-07, "loss": 62.1071, "step": 108980 }, { "epoch": 0.9016006948752947, "grad_norm": 995.0176391601562, "learning_rate": 4.2712460464869934e-07, "loss": 63.1329, "step": 108990 }, { "epoch": 0.9016834181246639, "grad_norm": 1008.2059326171875, "learning_rate": 4.26554411352022e-07, "loss": 88.6648, "step": 109000 }, { "epoch": 0.9017661413740332, "grad_norm": 1263.0606689453125, "learning_rate": 4.259845819434233e-07, "loss": 86.4794, "step": 109010 }, { "epoch": 0.9018488646234024, "grad_norm": 1270.1148681640625, "learning_rate": 4.254151164682385e-07, "loss": 106.5339, "step": 109020 }, { "epoch": 0.9019315878727716, "grad_norm": 885.9868774414062, "learning_rate": 4.248460149717781e-07, "loss": 76.8595, "step": 109030 }, { "epoch": 0.9020143111221409, "grad_norm": 845.4027099609375, "learning_rate": 4.242772774993237e-07, "loss": 83.7632, "step": 109040 }, { "epoch": 0.9020970343715101, "grad_norm": 1098.0489501953125, "learning_rate": 4.237089040961262e-07, "loss": 64.5871, "step": 109050 }, { "epoch": 0.9021797576208793, "grad_norm": 846.4615478515625, "learning_rate": 4.2314089480740893e-07, "loss": 79.8472, "step": 109060 }, { "epoch": 0.9022624808702486, "grad_norm": 858.0891723632812, "learning_rate": 4.2257324967836575e-07, "loss": 93.2545, "step": 109070 }, { "epoch": 0.9023452041196178, "grad_norm": 1338.1405029296875, "learning_rate": 4.2200596875416165e-07, "loss": 91.2463, "step": 109080 }, { "epoch": 0.902427927368987, "grad_norm": 1132.8909912109375, "learning_rate": 4.2143905207993217e-07, "loss": 94.3591, "step": 109090 }, { "epoch": 0.9025106506183563, "grad_norm": 1090.1063232421875, "learning_rate": 4.2087249970078513e-07, "loss": 80.4746, "step": 109100 }, { "epoch": 0.9025933738677255, "grad_norm": 697.7597045898438, "learning_rate": 4.2030631166179727e-07, "loss": 69.0265, "step": 109110 }, { "epoch": 0.9026760971170947, "grad_norm": 925.8981323242188, "learning_rate": 4.197404880080186e-07, "loss": 97.9635, "step": 109120 }, { "epoch": 0.902758820366464, "grad_norm": 705.66552734375, "learning_rate": 4.191750287844687e-07, "loss": 96.6312, "step": 109130 }, { "epoch": 0.9028415436158332, "grad_norm": 497.7852478027344, "learning_rate": 4.186099340361383e-07, "loss": 87.5341, "step": 109140 }, { "epoch": 0.9029242668652024, "grad_norm": 537.1528930664062, "learning_rate": 4.180452038079902e-07, "loss": 82.3161, "step": 109150 }, { "epoch": 0.9030069901145718, "grad_norm": 627.737548828125, "learning_rate": 4.174808381449563e-07, "loss": 84.3565, "step": 109160 }, { "epoch": 0.903089713363941, "grad_norm": 743.4111328125, "learning_rate": 4.1691683709194184e-07, "loss": 62.4872, "step": 109170 }, { "epoch": 0.9031724366133101, "grad_norm": 784.0700073242188, "learning_rate": 4.163532006938209e-07, "loss": 76.4367, "step": 109180 }, { "epoch": 0.9032551598626795, "grad_norm": 770.558837890625, "learning_rate": 4.1578992899543926e-07, "loss": 90.6556, "step": 109190 }, { "epoch": 0.9033378831120487, "grad_norm": 586.0490112304688, "learning_rate": 4.1522702204161493e-07, "loss": 84.0449, "step": 109200 }, { "epoch": 0.9034206063614179, "grad_norm": 923.1376953125, "learning_rate": 4.146644798771349e-07, "loss": 118.2938, "step": 109210 }, { "epoch": 0.9035033296107872, "grad_norm": 749.9999389648438, "learning_rate": 4.141023025467583e-07, "loss": 104.1386, "step": 109220 }, { "epoch": 0.9035860528601564, "grad_norm": 1027.83349609375, "learning_rate": 4.1354049009521504e-07, "loss": 80.9892, "step": 109230 }, { "epoch": 0.9036687761095256, "grad_norm": 886.0511474609375, "learning_rate": 4.1297904256720646e-07, "loss": 76.1879, "step": 109240 }, { "epoch": 0.9037514993588949, "grad_norm": 880.5858764648438, "learning_rate": 4.1241796000740296e-07, "loss": 60.6476, "step": 109250 }, { "epoch": 0.9038342226082641, "grad_norm": 1270.9674072265625, "learning_rate": 4.118572424604489e-07, "loss": 69.0217, "step": 109260 }, { "epoch": 0.9039169458576333, "grad_norm": 513.490234375, "learning_rate": 4.112968899709574e-07, "loss": 70.7013, "step": 109270 }, { "epoch": 0.9039996691070025, "grad_norm": 1045.45654296875, "learning_rate": 4.1073690258351287e-07, "loss": 71.1828, "step": 109280 }, { "epoch": 0.9040823923563718, "grad_norm": 977.5629272460938, "learning_rate": 4.101772803426707e-07, "loss": 104.8837, "step": 109290 }, { "epoch": 0.904165115605741, "grad_norm": 690.1253662109375, "learning_rate": 4.0961802329295864e-07, "loss": 70.6142, "step": 109300 }, { "epoch": 0.9042478388551102, "grad_norm": 1407.6588134765625, "learning_rate": 4.090591314788728e-07, "loss": 83.4057, "step": 109310 }, { "epoch": 0.9043305621044795, "grad_norm": 595.9920654296875, "learning_rate": 4.085006049448825e-07, "loss": 56.3994, "step": 109320 }, { "epoch": 0.9044132853538487, "grad_norm": 965.2755126953125, "learning_rate": 4.0794244373542736e-07, "loss": 99.3913, "step": 109330 }, { "epoch": 0.9044960086032179, "grad_norm": 831.5714111328125, "learning_rate": 4.0738464789491673e-07, "loss": 81.9469, "step": 109340 }, { "epoch": 0.9045787318525872, "grad_norm": 581.9131469726562, "learning_rate": 4.0682721746773346e-07, "loss": 74.2238, "step": 109350 }, { "epoch": 0.9046614551019564, "grad_norm": 733.6002197265625, "learning_rate": 4.062701524982271e-07, "loss": 83.6481, "step": 109360 }, { "epoch": 0.9047441783513256, "grad_norm": 720.9714965820312, "learning_rate": 4.057134530307233e-07, "loss": 66.0066, "step": 109370 }, { "epoch": 0.9048269016006949, "grad_norm": 1211.7706298828125, "learning_rate": 4.0515711910951436e-07, "loss": 103.6463, "step": 109380 }, { "epoch": 0.9049096248500641, "grad_norm": 887.6861572265625, "learning_rate": 4.0460115077886665e-07, "loss": 74.1794, "step": 109390 }, { "epoch": 0.9049923480994333, "grad_norm": 725.7745361328125, "learning_rate": 4.0404554808301523e-07, "loss": 86.6203, "step": 109400 }, { "epoch": 0.9050750713488026, "grad_norm": 892.6699829101562, "learning_rate": 4.0349031106616697e-07, "loss": 83.9293, "step": 109410 }, { "epoch": 0.9051577945981718, "grad_norm": 1044.3946533203125, "learning_rate": 4.029354397724994e-07, "loss": 81.0904, "step": 109420 }, { "epoch": 0.905240517847541, "grad_norm": 426.7649230957031, "learning_rate": 4.023809342461615e-07, "loss": 81.0433, "step": 109430 }, { "epoch": 0.9053232410969103, "grad_norm": 915.0441284179688, "learning_rate": 4.0182679453127316e-07, "loss": 117.5908, "step": 109440 }, { "epoch": 0.9054059643462795, "grad_norm": 572.4617919921875, "learning_rate": 4.0127302067192285e-07, "loss": 74.3403, "step": 109450 }, { "epoch": 0.9054886875956487, "grad_norm": 1104.9822998046875, "learning_rate": 4.007196127121726e-07, "loss": 92.4061, "step": 109460 }, { "epoch": 0.905571410845018, "grad_norm": 1072.473876953125, "learning_rate": 4.001665706960556e-07, "loss": 92.3097, "step": 109470 }, { "epoch": 0.9056541340943872, "grad_norm": 716.0008544921875, "learning_rate": 3.996138946675737e-07, "loss": 90.4532, "step": 109480 }, { "epoch": 0.9057368573437564, "grad_norm": 509.80621337890625, "learning_rate": 3.9906158467070187e-07, "loss": 87.5559, "step": 109490 }, { "epoch": 0.9058195805931257, "grad_norm": 1042.047607421875, "learning_rate": 3.985096407493838e-07, "loss": 98.2946, "step": 109500 }, { "epoch": 0.9059023038424949, "grad_norm": 788.8461303710938, "learning_rate": 3.97958062947536e-07, "loss": 78.6691, "step": 109510 }, { "epoch": 0.9059850270918641, "grad_norm": 560.147216796875, "learning_rate": 3.9740685130904455e-07, "loss": 60.2149, "step": 109520 }, { "epoch": 0.9060677503412334, "grad_norm": 1752.591796875, "learning_rate": 3.9685600587776815e-07, "loss": 108.2573, "step": 109530 }, { "epoch": 0.9061504735906026, "grad_norm": 870.2755737304688, "learning_rate": 3.9630552669753243e-07, "loss": 70.9846, "step": 109540 }, { "epoch": 0.9062331968399718, "grad_norm": 1336.35400390625, "learning_rate": 3.9575541381213776e-07, "loss": 69.7969, "step": 109550 }, { "epoch": 0.9063159200893411, "grad_norm": 897.2123413085938, "learning_rate": 3.9520566726535367e-07, "loss": 82.9407, "step": 109560 }, { "epoch": 0.9063986433387103, "grad_norm": 1298.525390625, "learning_rate": 3.9465628710092185e-07, "loss": 114.8549, "step": 109570 }, { "epoch": 0.9064813665880795, "grad_norm": 1084.1280517578125, "learning_rate": 3.9410727336255396e-07, "loss": 84.6156, "step": 109580 }, { "epoch": 0.9065640898374488, "grad_norm": 580.8189697265625, "learning_rate": 3.935586260939322e-07, "loss": 58.7614, "step": 109590 }, { "epoch": 0.906646813086818, "grad_norm": 892.2899169921875, "learning_rate": 3.930103453387096e-07, "loss": 102.3395, "step": 109600 }, { "epoch": 0.9067295363361872, "grad_norm": 429.3028564453125, "learning_rate": 3.9246243114051174e-07, "loss": 77.4244, "step": 109610 }, { "epoch": 0.9068122595855566, "grad_norm": 469.5216979980469, "learning_rate": 3.919148835429315e-07, "loss": 64.1674, "step": 109620 }, { "epoch": 0.9068949828349258, "grad_norm": 779.257568359375, "learning_rate": 3.913677025895357e-07, "loss": 84.8305, "step": 109630 }, { "epoch": 0.906977706084295, "grad_norm": 1248.734375, "learning_rate": 3.9082088832386124e-07, "loss": 85.1336, "step": 109640 }, { "epoch": 0.9070604293336643, "grad_norm": 785.3364868164062, "learning_rate": 3.9027444078941435e-07, "loss": 110.6684, "step": 109650 }, { "epoch": 0.9071431525830335, "grad_norm": 1085.676513671875, "learning_rate": 3.897283600296753e-07, "loss": 77.48, "step": 109660 }, { "epoch": 0.9072258758324027, "grad_norm": 958.9440307617188, "learning_rate": 3.8918264608809207e-07, "loss": 78.6371, "step": 109670 }, { "epoch": 0.907308599081772, "grad_norm": 1247.623046875, "learning_rate": 3.886372990080856e-07, "loss": 88.1574, "step": 109680 }, { "epoch": 0.9073913223311412, "grad_norm": 860.1593017578125, "learning_rate": 3.8809231883304544e-07, "loss": 73.0786, "step": 109690 }, { "epoch": 0.9074740455805104, "grad_norm": 1131.9002685546875, "learning_rate": 3.875477056063343e-07, "loss": 99.7562, "step": 109700 }, { "epoch": 0.9075567688298797, "grad_norm": 578.1716918945312, "learning_rate": 3.8700345937128346e-07, "loss": 74.0123, "step": 109710 }, { "epoch": 0.9076394920792489, "grad_norm": 990.4799194335938, "learning_rate": 3.864595801711968e-07, "loss": 96.1826, "step": 109720 }, { "epoch": 0.9077222153286181, "grad_norm": 1159.52490234375, "learning_rate": 3.8591606804934733e-07, "loss": 113.0236, "step": 109730 }, { "epoch": 0.9078049385779874, "grad_norm": 732.7988891601562, "learning_rate": 3.853729230489811e-07, "loss": 95.0779, "step": 109740 }, { "epoch": 0.9078876618273566, "grad_norm": 947.6547241210938, "learning_rate": 3.8483014521331184e-07, "loss": 64.9452, "step": 109750 }, { "epoch": 0.9079703850767258, "grad_norm": 954.159912109375, "learning_rate": 3.8428773458552835e-07, "loss": 96.2663, "step": 109760 }, { "epoch": 0.9080531083260951, "grad_norm": 807.883056640625, "learning_rate": 3.837456912087867e-07, "loss": 105.7476, "step": 109770 }, { "epoch": 0.9081358315754643, "grad_norm": 719.7168579101562, "learning_rate": 3.8320401512621505e-07, "loss": 90.5569, "step": 109780 }, { "epoch": 0.9082185548248335, "grad_norm": 927.4055786132812, "learning_rate": 3.826627063809113e-07, "loss": 55.1157, "step": 109790 }, { "epoch": 0.9083012780742028, "grad_norm": 900.8541870117188, "learning_rate": 3.821217650159453e-07, "loss": 66.976, "step": 109800 }, { "epoch": 0.908384001323572, "grad_norm": 1139.78125, "learning_rate": 3.8158119107435667e-07, "loss": 83.4938, "step": 109810 }, { "epoch": 0.9084667245729412, "grad_norm": 928.9935302734375, "learning_rate": 3.810409845991575e-07, "loss": 81.5093, "step": 109820 }, { "epoch": 0.9085494478223105, "grad_norm": 866.55712890625, "learning_rate": 3.805011456333285e-07, "loss": 72.1152, "step": 109830 }, { "epoch": 0.9086321710716797, "grad_norm": 1261.871826171875, "learning_rate": 3.799616742198231e-07, "loss": 81.8657, "step": 109840 }, { "epoch": 0.9087148943210489, "grad_norm": 633.0125122070312, "learning_rate": 3.794225704015636e-07, "loss": 115.4469, "step": 109850 }, { "epoch": 0.9087976175704182, "grad_norm": 1088.3365478515625, "learning_rate": 3.7888383422144517e-07, "loss": 96.6127, "step": 109860 }, { "epoch": 0.9088803408197874, "grad_norm": 1705.669677734375, "learning_rate": 3.7834546572233287e-07, "loss": 90.1035, "step": 109870 }, { "epoch": 0.9089630640691566, "grad_norm": 1340.4810791015625, "learning_rate": 3.778074649470603e-07, "loss": 97.6526, "step": 109880 }, { "epoch": 0.9090457873185259, "grad_norm": 471.0425720214844, "learning_rate": 3.772698319384349e-07, "loss": 75.8749, "step": 109890 }, { "epoch": 0.9091285105678951, "grad_norm": 664.8509521484375, "learning_rate": 3.7673256673923356e-07, "loss": 78.4327, "step": 109900 }, { "epoch": 0.9092112338172643, "grad_norm": 1128.6776123046875, "learning_rate": 3.7619566939220363e-07, "loss": 109.4718, "step": 109910 }, { "epoch": 0.9092939570666336, "grad_norm": 811.8029174804688, "learning_rate": 3.7565913994006386e-07, "loss": 80.7976, "step": 109920 }, { "epoch": 0.9093766803160028, "grad_norm": 873.7833251953125, "learning_rate": 3.751229784255039e-07, "loss": 89.7391, "step": 109930 }, { "epoch": 0.909459403565372, "grad_norm": 1071.7000732421875, "learning_rate": 3.745871848911831e-07, "loss": 93.3303, "step": 109940 }, { "epoch": 0.9095421268147413, "grad_norm": 740.0640869140625, "learning_rate": 3.7405175937973103e-07, "loss": 81.3143, "step": 109950 }, { "epoch": 0.9096248500641105, "grad_norm": 1029.9522705078125, "learning_rate": 3.735167019337527e-07, "loss": 94.7903, "step": 109960 }, { "epoch": 0.9097075733134797, "grad_norm": 1091.040283203125, "learning_rate": 3.7298201259581615e-07, "loss": 78.4557, "step": 109970 }, { "epoch": 0.909790296562849, "grad_norm": 554.0980834960938, "learning_rate": 3.724476914084657e-07, "loss": 78.7385, "step": 109980 }, { "epoch": 0.9098730198122182, "grad_norm": 1428.4512939453125, "learning_rate": 3.719137384142152e-07, "loss": 104.3615, "step": 109990 }, { "epoch": 0.9099557430615874, "grad_norm": 995.0647583007812, "learning_rate": 3.7138015365554834e-07, "loss": 110.6832, "step": 110000 }, { "epoch": 0.9100384663109566, "grad_norm": 580.6385498046875, "learning_rate": 3.7084693717492016e-07, "loss": 56.6644, "step": 110010 }, { "epoch": 0.910121189560326, "grad_norm": 1886.46240234375, "learning_rate": 3.7031408901475605e-07, "loss": 68.3266, "step": 110020 }, { "epoch": 0.9102039128096951, "grad_norm": 573.3003540039062, "learning_rate": 3.6978160921745277e-07, "loss": 82.59, "step": 110030 }, { "epoch": 0.9102866360590643, "grad_norm": 722.798828125, "learning_rate": 3.692494978253769e-07, "loss": 111.005, "step": 110040 }, { "epoch": 0.9103693593084337, "grad_norm": 463.8318786621094, "learning_rate": 3.6871775488086624e-07, "loss": 60.8547, "step": 110050 }, { "epoch": 0.9104520825578029, "grad_norm": 783.125, "learning_rate": 3.681863804262292e-07, "loss": 91.4471, "step": 110060 }, { "epoch": 0.910534805807172, "grad_norm": 773.2374267578125, "learning_rate": 3.676553745037448e-07, "loss": 104.682, "step": 110070 }, { "epoch": 0.9106175290565414, "grad_norm": 917.3580322265625, "learning_rate": 3.671247371556624e-07, "loss": 94.0389, "step": 110080 }, { "epoch": 0.9107002523059106, "grad_norm": 892.2453002929688, "learning_rate": 3.665944684242029e-07, "loss": 98.2737, "step": 110090 }, { "epoch": 0.9107829755552798, "grad_norm": 978.7361450195312, "learning_rate": 3.660645683515568e-07, "loss": 78.8953, "step": 110100 }, { "epoch": 0.9108656988046491, "grad_norm": 538.931640625, "learning_rate": 3.65535036979886e-07, "loss": 89.924, "step": 110110 }, { "epoch": 0.9109484220540183, "grad_norm": 516.4479370117188, "learning_rate": 3.650058743513235e-07, "loss": 87.3976, "step": 110120 }, { "epoch": 0.9110311453033875, "grad_norm": 1158.6689453125, "learning_rate": 3.644770805079717e-07, "loss": 77.452, "step": 110130 }, { "epoch": 0.9111138685527568, "grad_norm": 1315.6121826171875, "learning_rate": 3.639486554919042e-07, "loss": 106.9289, "step": 110140 }, { "epoch": 0.911196591802126, "grad_norm": 816.3543701171875, "learning_rate": 3.634205993451656e-07, "loss": 82.4566, "step": 110150 }, { "epoch": 0.9112793150514952, "grad_norm": 838.74267578125, "learning_rate": 3.628929121097707e-07, "loss": 87.7295, "step": 110160 }, { "epoch": 0.9113620383008645, "grad_norm": 713.4924926757812, "learning_rate": 3.6236559382770597e-07, "loss": 71.8615, "step": 110170 }, { "epoch": 0.9114447615502337, "grad_norm": 1672.6297607421875, "learning_rate": 3.6183864454092664e-07, "loss": 99.8394, "step": 110180 }, { "epoch": 0.9115274847996029, "grad_norm": 982.43896484375, "learning_rate": 3.6131206429135977e-07, "loss": 80.5979, "step": 110190 }, { "epoch": 0.9116102080489722, "grad_norm": 1182.8560791015625, "learning_rate": 3.607858531209035e-07, "loss": 89.0724, "step": 110200 }, { "epoch": 0.9116929312983414, "grad_norm": 1181.8760986328125, "learning_rate": 3.6026001107142606e-07, "loss": 103.4322, "step": 110210 }, { "epoch": 0.9117756545477106, "grad_norm": 418.0647277832031, "learning_rate": 3.597345381847656e-07, "loss": 89.5779, "step": 110220 }, { "epoch": 0.9118583777970799, "grad_norm": 929.8257446289062, "learning_rate": 3.592094345027325e-07, "loss": 77.7989, "step": 110230 }, { "epoch": 0.9119411010464491, "grad_norm": 1179.2841796875, "learning_rate": 3.5868470006710564e-07, "loss": 121.445, "step": 110240 }, { "epoch": 0.9120238242958183, "grad_norm": 564.2163696289062, "learning_rate": 3.581603349196372e-07, "loss": 64.8469, "step": 110250 }, { "epoch": 0.9121065475451876, "grad_norm": 596.6292114257812, "learning_rate": 3.576363391020471e-07, "loss": 93.1684, "step": 110260 }, { "epoch": 0.9121892707945568, "grad_norm": 695.0462646484375, "learning_rate": 3.5711271265602807e-07, "loss": 77.7925, "step": 110270 }, { "epoch": 0.912271994043926, "grad_norm": 707.590087890625, "learning_rate": 3.56589455623243e-07, "loss": 71.3893, "step": 110280 }, { "epoch": 0.9123547172932953, "grad_norm": 0.0, "learning_rate": 3.56066568045324e-07, "loss": 64.9731, "step": 110290 }, { "epoch": 0.9124374405426645, "grad_norm": 654.4577026367188, "learning_rate": 3.555440499638768e-07, "loss": 80.6185, "step": 110300 }, { "epoch": 0.9125201637920337, "grad_norm": 691.3319702148438, "learning_rate": 3.55021901420472e-07, "loss": 61.5873, "step": 110310 }, { "epoch": 0.912602887041403, "grad_norm": 966.6505737304688, "learning_rate": 3.54500122456658e-07, "loss": 73.3954, "step": 110320 }, { "epoch": 0.9126856102907722, "grad_norm": 1062.859130859375, "learning_rate": 3.5397871311394937e-07, "loss": 88.4193, "step": 110330 }, { "epoch": 0.9127683335401414, "grad_norm": 1225.390625, "learning_rate": 3.534576734338324e-07, "loss": 85.1301, "step": 110340 }, { "epoch": 0.9128510567895107, "grad_norm": 604.2843017578125, "learning_rate": 3.529370034577634e-07, "loss": 74.7762, "step": 110350 }, { "epoch": 0.9129337800388799, "grad_norm": 659.66357421875, "learning_rate": 3.5241670322717025e-07, "loss": 113.1879, "step": 110360 }, { "epoch": 0.9130165032882491, "grad_norm": 746.4213256835938, "learning_rate": 3.518967727834499e-07, "loss": 81.8533, "step": 110370 }, { "epoch": 0.9130992265376184, "grad_norm": 654.5377807617188, "learning_rate": 3.513772121679715e-07, "loss": 65.3405, "step": 110380 }, { "epoch": 0.9131819497869876, "grad_norm": 734.1673583984375, "learning_rate": 3.508580214220753e-07, "loss": 102.2626, "step": 110390 }, { "epoch": 0.9132646730363568, "grad_norm": 512.5213012695312, "learning_rate": 3.503392005870687e-07, "loss": 81.4534, "step": 110400 }, { "epoch": 0.9133473962857261, "grad_norm": 852.9417114257812, "learning_rate": 3.498207497042316e-07, "loss": 85.8927, "step": 110410 }, { "epoch": 0.9134301195350953, "grad_norm": 632.1807250976562, "learning_rate": 3.4930266881481714e-07, "loss": 124.6005, "step": 110420 }, { "epoch": 0.9135128427844645, "grad_norm": 901.0861206054688, "learning_rate": 3.487849579600455e-07, "loss": 94.3462, "step": 110430 }, { "epoch": 0.9135955660338339, "grad_norm": 660.1860961914062, "learning_rate": 3.482676171811089e-07, "loss": 86.6812, "step": 110440 }, { "epoch": 0.913678289283203, "grad_norm": 1478.7293701171875, "learning_rate": 3.4775064651916877e-07, "loss": 68.7283, "step": 110450 }, { "epoch": 0.9137610125325722, "grad_norm": 927.5498657226562, "learning_rate": 3.472340460153595e-07, "loss": 72.09, "step": 110460 }, { "epoch": 0.9138437357819416, "grad_norm": 589.6502685546875, "learning_rate": 3.4671781571078424e-07, "loss": 97.7405, "step": 110470 }, { "epoch": 0.9139264590313108, "grad_norm": 614.3450927734375, "learning_rate": 3.462019556465157e-07, "loss": 126.026, "step": 110480 }, { "epoch": 0.91400918228068, "grad_norm": 394.06805419921875, "learning_rate": 3.4568646586359944e-07, "loss": 83.8727, "step": 110490 }, { "epoch": 0.9140919055300493, "grad_norm": 824.491455078125, "learning_rate": 3.4517134640305097e-07, "loss": 100.4316, "step": 110500 }, { "epoch": 0.9141746287794185, "grad_norm": 895.6577758789062, "learning_rate": 3.446565973058552e-07, "loss": 83.8836, "step": 110510 }, { "epoch": 0.9142573520287877, "grad_norm": 1477.3956298828125, "learning_rate": 3.441422186129689e-07, "loss": 109.6496, "step": 110520 }, { "epoch": 0.914340075278157, "grad_norm": 818.725830078125, "learning_rate": 3.4362821036531936e-07, "loss": 93.3673, "step": 110530 }, { "epoch": 0.9144227985275262, "grad_norm": 812.5512084960938, "learning_rate": 3.431145726038032e-07, "loss": 72.2729, "step": 110540 }, { "epoch": 0.9145055217768954, "grad_norm": 812.0310668945312, "learning_rate": 3.426013053692878e-07, "loss": 97.3781, "step": 110550 }, { "epoch": 0.9145882450262647, "grad_norm": 372.3507385253906, "learning_rate": 3.4208840870261326e-07, "loss": 81.1851, "step": 110560 }, { "epoch": 0.9146709682756339, "grad_norm": 1096.9320068359375, "learning_rate": 3.415758826445864e-07, "loss": 82.4156, "step": 110570 }, { "epoch": 0.9147536915250031, "grad_norm": 1314.9417724609375, "learning_rate": 3.410637272359868e-07, "loss": 97.9378, "step": 110580 }, { "epoch": 0.9148364147743724, "grad_norm": 835.7549438476562, "learning_rate": 3.405519425175652e-07, "loss": 110.182, "step": 110590 }, { "epoch": 0.9149191380237416, "grad_norm": 819.3499145507812, "learning_rate": 3.400405285300412e-07, "loss": 71.8763, "step": 110600 }, { "epoch": 0.9150018612731108, "grad_norm": 969.0990600585938, "learning_rate": 3.3952948531410566e-07, "loss": 73.1048, "step": 110610 }, { "epoch": 0.9150845845224801, "grad_norm": 1323.8681640625, "learning_rate": 3.390188129104205e-07, "loss": 111.2788, "step": 110620 }, { "epoch": 0.9151673077718493, "grad_norm": 1279.118408203125, "learning_rate": 3.3850851135961814e-07, "loss": 119.1711, "step": 110630 }, { "epoch": 0.9152500310212185, "grad_norm": 767.4146118164062, "learning_rate": 3.379985807023001e-07, "loss": 123.3025, "step": 110640 }, { "epoch": 0.9153327542705878, "grad_norm": 2297.595703125, "learning_rate": 3.3748902097903936e-07, "loss": 93.4889, "step": 110650 }, { "epoch": 0.915415477519957, "grad_norm": 851.3316040039062, "learning_rate": 3.369798322303786e-07, "loss": 81.7054, "step": 110660 }, { "epoch": 0.9154982007693262, "grad_norm": 671.036865234375, "learning_rate": 3.364710144968325e-07, "loss": 84.0587, "step": 110670 }, { "epoch": 0.9155809240186955, "grad_norm": 1349.0550537109375, "learning_rate": 3.359625678188849e-07, "loss": 91.475, "step": 110680 }, { "epoch": 0.9156636472680647, "grad_norm": 758.9757690429688, "learning_rate": 3.3545449223699065e-07, "loss": 79.7885, "step": 110690 }, { "epoch": 0.9157463705174339, "grad_norm": 755.66064453125, "learning_rate": 3.3494678779157464e-07, "loss": 81.5341, "step": 110700 }, { "epoch": 0.9158290937668032, "grad_norm": 864.9046630859375, "learning_rate": 3.3443945452303337e-07, "loss": 98.7411, "step": 110710 }, { "epoch": 0.9159118170161724, "grad_norm": 536.4815063476562, "learning_rate": 3.33932492471733e-07, "loss": 98.7579, "step": 110720 }, { "epoch": 0.9159945402655416, "grad_norm": 550.0960693359375, "learning_rate": 3.334259016780106e-07, "loss": 84.7401, "step": 110730 }, { "epoch": 0.9160772635149108, "grad_norm": 1865.9971923828125, "learning_rate": 3.3291968218217175e-07, "loss": 84.6278, "step": 110740 }, { "epoch": 0.9161599867642801, "grad_norm": 598.4445190429688, "learning_rate": 3.324138340244948e-07, "loss": 57.0423, "step": 110750 }, { "epoch": 0.9162427100136493, "grad_norm": 871.3580932617188, "learning_rate": 3.319083572452275e-07, "loss": 72.3384, "step": 110760 }, { "epoch": 0.9163254332630185, "grad_norm": 476.9991760253906, "learning_rate": 3.314032518845889e-07, "loss": 68.3785, "step": 110770 }, { "epoch": 0.9164081565123878, "grad_norm": 693.8629760742188, "learning_rate": 3.308985179827673e-07, "loss": 114.7213, "step": 110780 }, { "epoch": 0.916490879761757, "grad_norm": 797.1781616210938, "learning_rate": 3.303941555799223e-07, "loss": 86.4564, "step": 110790 }, { "epoch": 0.9165736030111262, "grad_norm": 313.2153015136719, "learning_rate": 3.298901647161834e-07, "loss": 91.0484, "step": 110800 }, { "epoch": 0.9166563262604955, "grad_norm": 865.4092407226562, "learning_rate": 3.293865454316514e-07, "loss": 105.9156, "step": 110810 }, { "epoch": 0.9167390495098647, "grad_norm": 1219.156005859375, "learning_rate": 3.2888329776639807e-07, "loss": 95.9394, "step": 110820 }, { "epoch": 0.9168217727592339, "grad_norm": 866.1614379882812, "learning_rate": 3.283804217604619e-07, "loss": 96.5862, "step": 110830 }, { "epoch": 0.9169044960086032, "grad_norm": 650.52490234375, "learning_rate": 3.27877917453856e-07, "loss": 89.6044, "step": 110840 }, { "epoch": 0.9169872192579724, "grad_norm": 1170.3040771484375, "learning_rate": 3.273757848865622e-07, "loss": 94.1275, "step": 110850 }, { "epoch": 0.9170699425073416, "grad_norm": 1928.0880126953125, "learning_rate": 3.2687402409853243e-07, "loss": 109.4509, "step": 110860 }, { "epoch": 0.917152665756711, "grad_norm": 754.0807495117188, "learning_rate": 3.2637263512969033e-07, "loss": 69.4093, "step": 110870 }, { "epoch": 0.9172353890060801, "grad_norm": 695.7767333984375, "learning_rate": 3.258716180199278e-07, "loss": 82.0896, "step": 110880 }, { "epoch": 0.9173181122554493, "grad_norm": 1156.6104736328125, "learning_rate": 3.253709728091098e-07, "loss": 72.8865, "step": 110890 }, { "epoch": 0.9174008355048187, "grad_norm": 551.685546875, "learning_rate": 3.2487069953706983e-07, "loss": 74.3535, "step": 110900 }, { "epoch": 0.9174835587541879, "grad_norm": 657.1426391601562, "learning_rate": 3.243707982436123e-07, "loss": 67.814, "step": 110910 }, { "epoch": 0.917566282003557, "grad_norm": 796.87109375, "learning_rate": 3.238712689685125e-07, "loss": 61.9698, "step": 110920 }, { "epoch": 0.9176490052529264, "grad_norm": 1190.6385498046875, "learning_rate": 3.2337211175151484e-07, "loss": 91.3083, "step": 110930 }, { "epoch": 0.9177317285022956, "grad_norm": 977.4860229492188, "learning_rate": 3.2287332663233527e-07, "loss": 74.0361, "step": 110940 }, { "epoch": 0.9178144517516648, "grad_norm": 757.3435668945312, "learning_rate": 3.223749136506604e-07, "loss": 80.6914, "step": 110950 }, { "epoch": 0.9178971750010341, "grad_norm": 827.2467041015625, "learning_rate": 3.218768728461458e-07, "loss": 111.4018, "step": 110960 }, { "epoch": 0.9179798982504033, "grad_norm": 609.2623291015625, "learning_rate": 3.2137920425841907e-07, "loss": 99.3807, "step": 110970 }, { "epoch": 0.9180626214997725, "grad_norm": 1110.6324462890625, "learning_rate": 3.2088190792707696e-07, "loss": 80.4231, "step": 110980 }, { "epoch": 0.9181453447491418, "grad_norm": 689.8076782226562, "learning_rate": 3.2038498389168724e-07, "loss": 65.6498, "step": 110990 }, { "epoch": 0.918228067998511, "grad_norm": 941.228515625, "learning_rate": 3.1988843219178776e-07, "loss": 85.3875, "step": 111000 }, { "epoch": 0.9183107912478802, "grad_norm": 887.41845703125, "learning_rate": 3.193922528668869e-07, "loss": 69.6305, "step": 111010 }, { "epoch": 0.9183935144972495, "grad_norm": 713.7828369140625, "learning_rate": 3.188964459564636e-07, "loss": 81.9818, "step": 111020 }, { "epoch": 0.9184762377466187, "grad_norm": 539.0556030273438, "learning_rate": 3.184010114999664e-07, "loss": 83.2385, "step": 111030 }, { "epoch": 0.9185589609959879, "grad_norm": 917.137451171875, "learning_rate": 3.179059495368153e-07, "loss": 101.1152, "step": 111040 }, { "epoch": 0.9186416842453572, "grad_norm": 666.9295043945312, "learning_rate": 3.174112601064e-07, "loss": 64.8105, "step": 111050 }, { "epoch": 0.9187244074947264, "grad_norm": 525.5652465820312, "learning_rate": 3.1691694324808063e-07, "loss": 69.7607, "step": 111060 }, { "epoch": 0.9188071307440956, "grad_norm": 1084.0616455078125, "learning_rate": 3.1642299900118743e-07, "loss": 113.7904, "step": 111070 }, { "epoch": 0.9188898539934649, "grad_norm": 1473.717041015625, "learning_rate": 3.159294274050212e-07, "loss": 114.3284, "step": 111080 }, { "epoch": 0.9189725772428341, "grad_norm": 789.0966186523438, "learning_rate": 3.154362284988538e-07, "loss": 85.0554, "step": 111090 }, { "epoch": 0.9190553004922033, "grad_norm": 970.8892211914062, "learning_rate": 3.1494340232192667e-07, "loss": 77.95, "step": 111100 }, { "epoch": 0.9191380237415726, "grad_norm": 856.5325927734375, "learning_rate": 3.144509489134512e-07, "loss": 85.2512, "step": 111110 }, { "epoch": 0.9192207469909418, "grad_norm": 685.9720458984375, "learning_rate": 3.139588683126099e-07, "loss": 74.0177, "step": 111120 }, { "epoch": 0.919303470240311, "grad_norm": 782.5280151367188, "learning_rate": 3.134671605585554e-07, "loss": 73.0808, "step": 111130 }, { "epoch": 0.9193861934896803, "grad_norm": 421.8550720214844, "learning_rate": 3.129758256904109e-07, "loss": 67.5404, "step": 111140 }, { "epoch": 0.9194689167390495, "grad_norm": 855.1493530273438, "learning_rate": 3.1248486374726884e-07, "loss": 86.1914, "step": 111150 }, { "epoch": 0.9195516399884187, "grad_norm": 581.30029296875, "learning_rate": 3.119942747681948e-07, "loss": 95.9546, "step": 111160 }, { "epoch": 0.919634363237788, "grad_norm": 637.724853515625, "learning_rate": 3.1150405879221965e-07, "loss": 97.2694, "step": 111170 }, { "epoch": 0.9197170864871572, "grad_norm": 706.0328369140625, "learning_rate": 3.110142158583496e-07, "loss": 94.9882, "step": 111180 }, { "epoch": 0.9197998097365264, "grad_norm": 560.701416015625, "learning_rate": 3.1052474600555936e-07, "loss": 66.4736, "step": 111190 }, { "epoch": 0.9198825329858957, "grad_norm": 1017.1104125976562, "learning_rate": 3.100356492727929e-07, "loss": 84.1691, "step": 111200 }, { "epoch": 0.9199652562352649, "grad_norm": 745.77587890625, "learning_rate": 3.0954692569896585e-07, "loss": 68.3992, "step": 111210 }, { "epoch": 0.9200479794846341, "grad_norm": 499.56640625, "learning_rate": 3.0905857532296414e-07, "loss": 87.3954, "step": 111220 }, { "epoch": 0.9201307027340034, "grad_norm": 1444.1029052734375, "learning_rate": 3.085705981836423e-07, "loss": 81.4259, "step": 111230 }, { "epoch": 0.9202134259833726, "grad_norm": 508.9635314941406, "learning_rate": 3.080829943198277e-07, "loss": 89.5248, "step": 111240 }, { "epoch": 0.9202961492327418, "grad_norm": 996.2418212890625, "learning_rate": 3.0759576377031697e-07, "loss": 69.8985, "step": 111250 }, { "epoch": 0.9203788724821111, "grad_norm": 1550.9212646484375, "learning_rate": 3.071089065738747e-07, "loss": 69.4487, "step": 111260 }, { "epoch": 0.9204615957314803, "grad_norm": 1241.609130859375, "learning_rate": 3.0662242276923993e-07, "loss": 112.2663, "step": 111270 }, { "epoch": 0.9205443189808495, "grad_norm": 1453.5501708984375, "learning_rate": 3.061363123951189e-07, "loss": 87.4852, "step": 111280 }, { "epoch": 0.9206270422302189, "grad_norm": 792.2757568359375, "learning_rate": 3.0565057549019005e-07, "loss": 89.3375, "step": 111290 }, { "epoch": 0.920709765479588, "grad_norm": 365.10528564453125, "learning_rate": 3.051652120931003e-07, "loss": 77.1843, "step": 111300 }, { "epoch": 0.9207924887289572, "grad_norm": 1038.219482421875, "learning_rate": 3.0468022224246886e-07, "loss": 100.4181, "step": 111310 }, { "epoch": 0.9208752119783266, "grad_norm": 1549.9176025390625, "learning_rate": 3.04195605976883e-07, "loss": 101.2689, "step": 111320 }, { "epoch": 0.9209579352276958, "grad_norm": 1094.44873046875, "learning_rate": 3.0371136333490315e-07, "loss": 62.7427, "step": 111330 }, { "epoch": 0.921040658477065, "grad_norm": 764.5133056640625, "learning_rate": 3.0322749435505563e-07, "loss": 117.9781, "step": 111340 }, { "epoch": 0.9211233817264343, "grad_norm": 922.7494506835938, "learning_rate": 3.027439990758418e-07, "loss": 87.756, "step": 111350 }, { "epoch": 0.9212061049758035, "grad_norm": 2155.5322265625, "learning_rate": 3.022608775357294e-07, "loss": 109.2952, "step": 111360 }, { "epoch": 0.9212888282251727, "grad_norm": 639.3640747070312, "learning_rate": 3.017781297731598e-07, "loss": 65.4102, "step": 111370 }, { "epoch": 0.921371551474542, "grad_norm": 401.3136901855469, "learning_rate": 3.012957558265428e-07, "loss": 55.7189, "step": 111380 }, { "epoch": 0.9214542747239112, "grad_norm": 815.2987060546875, "learning_rate": 3.008137557342583e-07, "loss": 89.766, "step": 111390 }, { "epoch": 0.9215369979732804, "grad_norm": 848.1900024414062, "learning_rate": 3.003321295346573e-07, "loss": 70.3423, "step": 111400 }, { "epoch": 0.9216197212226497, "grad_norm": 1112.6951904296875, "learning_rate": 2.9985087726605965e-07, "loss": 96.3883, "step": 111410 }, { "epoch": 0.9217024444720189, "grad_norm": 863.7384643554688, "learning_rate": 2.9936999896675757e-07, "loss": 77.8842, "step": 111420 }, { "epoch": 0.9217851677213881, "grad_norm": 549.550537109375, "learning_rate": 2.988894946750115e-07, "loss": 97.5944, "step": 111430 }, { "epoch": 0.9218678909707574, "grad_norm": 348.6497497558594, "learning_rate": 2.9840936442905253e-07, "loss": 81.6903, "step": 111440 }, { "epoch": 0.9219506142201266, "grad_norm": 1831.6419677734375, "learning_rate": 2.979296082670835e-07, "loss": 112.7743, "step": 111450 }, { "epoch": 0.9220333374694958, "grad_norm": 490.64111328125, "learning_rate": 2.974502262272749e-07, "loss": 80.2675, "step": 111460 }, { "epoch": 0.922116060718865, "grad_norm": 768.78076171875, "learning_rate": 2.969712183477713e-07, "loss": 74.6032, "step": 111470 }, { "epoch": 0.9221987839682343, "grad_norm": 886.2664184570312, "learning_rate": 2.964925846666833e-07, "loss": 52.6146, "step": 111480 }, { "epoch": 0.9222815072176035, "grad_norm": 648.3212280273438, "learning_rate": 2.960143252220943e-07, "loss": 88.3323, "step": 111490 }, { "epoch": 0.9223642304669727, "grad_norm": 646.9230346679688, "learning_rate": 2.955364400520583e-07, "loss": 85.1518, "step": 111500 }, { "epoch": 0.922446953716342, "grad_norm": 783.1369018554688, "learning_rate": 2.950589291945954e-07, "loss": 83.9216, "step": 111510 }, { "epoch": 0.9225296769657112, "grad_norm": 624.5678100585938, "learning_rate": 2.9458179268770147e-07, "loss": 67.9207, "step": 111520 }, { "epoch": 0.9226124002150804, "grad_norm": 933.9120483398438, "learning_rate": 2.941050305693394e-07, "loss": 96.648, "step": 111530 }, { "epoch": 0.9226951234644497, "grad_norm": 759.1103515625, "learning_rate": 2.9362864287744266e-07, "loss": 79.1249, "step": 111540 }, { "epoch": 0.9227778467138189, "grad_norm": 709.6221923828125, "learning_rate": 2.931526296499154e-07, "loss": 75.1609, "step": 111550 }, { "epoch": 0.9228605699631881, "grad_norm": 1090.783203125, "learning_rate": 2.926769909246313e-07, "loss": 90.4893, "step": 111560 }, { "epoch": 0.9229432932125574, "grad_norm": 1291.6949462890625, "learning_rate": 2.9220172673943615e-07, "loss": 121.0514, "step": 111570 }, { "epoch": 0.9230260164619266, "grad_norm": 1319.6103515625, "learning_rate": 2.9172683713214354e-07, "loss": 81.1168, "step": 111580 }, { "epoch": 0.9231087397112958, "grad_norm": 887.0220947265625, "learning_rate": 2.9125232214053946e-07, "loss": 77.7259, "step": 111590 }, { "epoch": 0.9231914629606651, "grad_norm": 823.8740234375, "learning_rate": 2.9077818180237693e-07, "loss": 83.7634, "step": 111600 }, { "epoch": 0.9232741862100343, "grad_norm": 1067.777099609375, "learning_rate": 2.90304416155382e-07, "loss": 77.3156, "step": 111610 }, { "epoch": 0.9233569094594035, "grad_norm": 533.4111328125, "learning_rate": 2.898310252372505e-07, "loss": 81.9363, "step": 111620 }, { "epoch": 0.9234396327087728, "grad_norm": 800.60888671875, "learning_rate": 2.893580090856474e-07, "loss": 112.345, "step": 111630 }, { "epoch": 0.923522355958142, "grad_norm": 962.5048217773438, "learning_rate": 2.888853677382081e-07, "loss": 88.3361, "step": 111640 }, { "epoch": 0.9236050792075112, "grad_norm": 889.5928344726562, "learning_rate": 2.8841310123253865e-07, "loss": 81.9884, "step": 111650 }, { "epoch": 0.9236878024568805, "grad_norm": 697.1818237304688, "learning_rate": 2.879412096062162e-07, "loss": 121.3172, "step": 111660 }, { "epoch": 0.9237705257062497, "grad_norm": 667.9248657226562, "learning_rate": 2.874696928967863e-07, "loss": 97.8055, "step": 111670 }, { "epoch": 0.9238532489556189, "grad_norm": 442.2999572753906, "learning_rate": 2.869985511417661e-07, "loss": 54.4177, "step": 111680 }, { "epoch": 0.9239359722049882, "grad_norm": 686.0079956054688, "learning_rate": 2.8652778437864016e-07, "loss": 87.2172, "step": 111690 }, { "epoch": 0.9240186954543574, "grad_norm": 684.615966796875, "learning_rate": 2.8605739264486733e-07, "loss": 89.1697, "step": 111700 }, { "epoch": 0.9241014187037266, "grad_norm": 1551.7012939453125, "learning_rate": 2.8558737597787324e-07, "loss": 115.9352, "step": 111710 }, { "epoch": 0.924184141953096, "grad_norm": 1213.67236328125, "learning_rate": 2.851177344150552e-07, "loss": 97.0064, "step": 111720 }, { "epoch": 0.9242668652024651, "grad_norm": 768.0172729492188, "learning_rate": 2.846484679937811e-07, "loss": 81.57, "step": 111730 }, { "epoch": 0.9243495884518343, "grad_norm": 486.9347229003906, "learning_rate": 2.841795767513877e-07, "loss": 72.622, "step": 111740 }, { "epoch": 0.9244323117012037, "grad_norm": 799.0153198242188, "learning_rate": 2.8371106072518194e-07, "loss": 95.9192, "step": 111750 }, { "epoch": 0.9245150349505729, "grad_norm": 778.710205078125, "learning_rate": 2.8324291995244333e-07, "loss": 74.0421, "step": 111760 }, { "epoch": 0.924597758199942, "grad_norm": 1426.642822265625, "learning_rate": 2.8277515447041827e-07, "loss": 101.7019, "step": 111770 }, { "epoch": 0.9246804814493114, "grad_norm": 934.9050903320312, "learning_rate": 2.8230776431632523e-07, "loss": 93.3299, "step": 111780 }, { "epoch": 0.9247632046986806, "grad_norm": 839.4944458007812, "learning_rate": 2.8184074952735176e-07, "loss": 76.1551, "step": 111790 }, { "epoch": 0.9248459279480498, "grad_norm": 733.8991088867188, "learning_rate": 2.813741101406564e-07, "loss": 83.5848, "step": 111800 }, { "epoch": 0.9249286511974191, "grad_norm": 1104.2376708984375, "learning_rate": 2.8090784619336784e-07, "loss": 109.4974, "step": 111810 }, { "epoch": 0.9250113744467883, "grad_norm": 541.585205078125, "learning_rate": 2.804419577225842e-07, "loss": 99.5181, "step": 111820 }, { "epoch": 0.9250940976961575, "grad_norm": 1171.6307373046875, "learning_rate": 2.7997644476537353e-07, "loss": 105.5711, "step": 111830 }, { "epoch": 0.9251768209455268, "grad_norm": 1111.163818359375, "learning_rate": 2.7951130735877576e-07, "loss": 100.552, "step": 111840 }, { "epoch": 0.925259544194896, "grad_norm": 465.4991149902344, "learning_rate": 2.790465455397989e-07, "loss": 91.685, "step": 111850 }, { "epoch": 0.9253422674442652, "grad_norm": 768.08740234375, "learning_rate": 2.785821593454224e-07, "loss": 105.3449, "step": 111860 }, { "epoch": 0.9254249906936345, "grad_norm": 497.09466552734375, "learning_rate": 2.7811814881259503e-07, "loss": 67.444, "step": 111870 }, { "epoch": 0.9255077139430037, "grad_norm": 693.8814697265625, "learning_rate": 2.776545139782361e-07, "loss": 61.2311, "step": 111880 }, { "epoch": 0.9255904371923729, "grad_norm": 892.4473266601562, "learning_rate": 2.771912548792344e-07, "loss": 87.7277, "step": 111890 }, { "epoch": 0.9256731604417422, "grad_norm": 1138.6724853515625, "learning_rate": 2.767283715524505e-07, "loss": 122.5125, "step": 111900 }, { "epoch": 0.9257558836911114, "grad_norm": 755.6520385742188, "learning_rate": 2.762658640347127e-07, "loss": 69.8308, "step": 111910 }, { "epoch": 0.9258386069404806, "grad_norm": 983.39990234375, "learning_rate": 2.75803732362821e-07, "loss": 58.8145, "step": 111920 }, { "epoch": 0.9259213301898499, "grad_norm": 895.5469360351562, "learning_rate": 2.7534197657354543e-07, "loss": 91.3707, "step": 111930 }, { "epoch": 0.9260040534392191, "grad_norm": 1143.3978271484375, "learning_rate": 2.7488059670362553e-07, "loss": 80.0223, "step": 111940 }, { "epoch": 0.9260867766885883, "grad_norm": 666.8565673828125, "learning_rate": 2.744195927897708e-07, "loss": 119.4018, "step": 111950 }, { "epoch": 0.9261694999379576, "grad_norm": 1519.14404296875, "learning_rate": 2.7395896486866193e-07, "loss": 69.0608, "step": 111960 }, { "epoch": 0.9262522231873268, "grad_norm": 1167.140625, "learning_rate": 2.7349871297694895e-07, "loss": 95.0523, "step": 111970 }, { "epoch": 0.926334946436696, "grad_norm": 715.0806274414062, "learning_rate": 2.73038837151251e-07, "loss": 73.4228, "step": 111980 }, { "epoch": 0.9264176696860653, "grad_norm": 602.8383178710938, "learning_rate": 2.725793374281593e-07, "loss": 103.7122, "step": 111990 }, { "epoch": 0.9265003929354345, "grad_norm": 575.5548706054688, "learning_rate": 2.7212021384423415e-07, "loss": 59.8035, "step": 112000 }, { "epoch": 0.9265831161848037, "grad_norm": 687.127197265625, "learning_rate": 2.716614664360051e-07, "loss": 53.2006, "step": 112010 }, { "epoch": 0.926665839434173, "grad_norm": 912.4276123046875, "learning_rate": 2.71203095239973e-07, "loss": 71.3291, "step": 112020 }, { "epoch": 0.9267485626835422, "grad_norm": 720.695068359375, "learning_rate": 2.7074510029260814e-07, "loss": 92.2435, "step": 112030 }, { "epoch": 0.9268312859329114, "grad_norm": 881.0599975585938, "learning_rate": 2.702874816303519e-07, "loss": 83.2978, "step": 112040 }, { "epoch": 0.9269140091822807, "grad_norm": 1031.5977783203125, "learning_rate": 2.6983023928961406e-07, "loss": 91.2726, "step": 112050 }, { "epoch": 0.9269967324316499, "grad_norm": 976.7826538085938, "learning_rate": 2.6937337330677547e-07, "loss": 74.995, "step": 112060 }, { "epoch": 0.9270794556810191, "grad_norm": 916.8348388671875, "learning_rate": 2.689168837181866e-07, "loss": 101.2523, "step": 112070 }, { "epoch": 0.9271621789303884, "grad_norm": 889.0336303710938, "learning_rate": 2.684607705601688e-07, "loss": 84.4417, "step": 112080 }, { "epoch": 0.9272449021797576, "grad_norm": 1171.827392578125, "learning_rate": 2.680050338690132e-07, "loss": 133.0743, "step": 112090 }, { "epoch": 0.9273276254291268, "grad_norm": 868.6085205078125, "learning_rate": 2.675496736809796e-07, "loss": 84.3478, "step": 112100 }, { "epoch": 0.9274103486784961, "grad_norm": 571.0438842773438, "learning_rate": 2.6709469003230013e-07, "loss": 91.2716, "step": 112110 }, { "epoch": 0.9274930719278653, "grad_norm": 457.09393310546875, "learning_rate": 2.6664008295917364e-07, "loss": 72.2116, "step": 112120 }, { "epoch": 0.9275757951772345, "grad_norm": 1358.0347900390625, "learning_rate": 2.661858524977734e-07, "loss": 93.3441, "step": 112130 }, { "epoch": 0.9276585184266039, "grad_norm": 667.2669067382812, "learning_rate": 2.657319986842394e-07, "loss": 79.6386, "step": 112140 }, { "epoch": 0.927741241675973, "grad_norm": 1060.2071533203125, "learning_rate": 2.6527852155468327e-07, "loss": 94.3927, "step": 112150 }, { "epoch": 0.9278239649253422, "grad_norm": 1009.9804077148438, "learning_rate": 2.648254211451856e-07, "loss": 63.4351, "step": 112160 }, { "epoch": 0.9279066881747116, "grad_norm": 910.8653564453125, "learning_rate": 2.643726974917976e-07, "loss": 94.569, "step": 112170 }, { "epoch": 0.9279894114240808, "grad_norm": 451.7293701171875, "learning_rate": 2.6392035063054043e-07, "loss": 98.4698, "step": 112180 }, { "epoch": 0.92807213467345, "grad_norm": 976.0929565429688, "learning_rate": 2.634683805974059e-07, "loss": 69.7292, "step": 112190 }, { "epoch": 0.9281548579228192, "grad_norm": 999.5797119140625, "learning_rate": 2.63016787428354e-07, "loss": 100.5, "step": 112200 }, { "epoch": 0.9282375811721885, "grad_norm": 620.88623046875, "learning_rate": 2.6256557115931613e-07, "loss": 76.7702, "step": 112210 }, { "epoch": 0.9283203044215577, "grad_norm": 725.8551635742188, "learning_rate": 2.6211473182619405e-07, "loss": 74.561, "step": 112220 }, { "epoch": 0.9284030276709269, "grad_norm": 1008.1248168945312, "learning_rate": 2.616642694648591e-07, "loss": 58.9289, "step": 112230 }, { "epoch": 0.9284857509202962, "grad_norm": 1029.631591796875, "learning_rate": 2.612141841111521e-07, "loss": 83.1139, "step": 112240 }, { "epoch": 0.9285684741696654, "grad_norm": 921.6142578125, "learning_rate": 2.6076447580088426e-07, "loss": 60.3248, "step": 112250 }, { "epoch": 0.9286511974190346, "grad_norm": 684.411376953125, "learning_rate": 2.603151445698371e-07, "loss": 82.2432, "step": 112260 }, { "epoch": 0.9287339206684039, "grad_norm": 1127.3673095703125, "learning_rate": 2.598661904537619e-07, "loss": 102.584, "step": 112270 }, { "epoch": 0.9288166439177731, "grad_norm": 949.6649780273438, "learning_rate": 2.5941761348837966e-07, "loss": 110.4898, "step": 112280 }, { "epoch": 0.9288993671671423, "grad_norm": 1033.648193359375, "learning_rate": 2.5896941370938177e-07, "loss": 92.9099, "step": 112290 }, { "epoch": 0.9289820904165116, "grad_norm": 798.4220581054688, "learning_rate": 2.5852159115242857e-07, "loss": 71.5822, "step": 112300 }, { "epoch": 0.9290648136658808, "grad_norm": 766.9046630859375, "learning_rate": 2.580741458531516e-07, "loss": 79.5017, "step": 112310 }, { "epoch": 0.92914753691525, "grad_norm": 547.7767944335938, "learning_rate": 2.576270778471529e-07, "loss": 78.4615, "step": 112320 }, { "epoch": 0.9292302601646193, "grad_norm": 636.5359497070312, "learning_rate": 2.571803871700029e-07, "loss": 64.6448, "step": 112330 }, { "epoch": 0.9293129834139885, "grad_norm": 1199.5556640625, "learning_rate": 2.567340738572427e-07, "loss": 102.5773, "step": 112340 }, { "epoch": 0.9293957066633577, "grad_norm": 1170.7523193359375, "learning_rate": 2.5628813794438434e-07, "loss": 80.3686, "step": 112350 }, { "epoch": 0.929478429912727, "grad_norm": 1347.653564453125, "learning_rate": 2.5584257946690836e-07, "loss": 81.7604, "step": 112360 }, { "epoch": 0.9295611531620962, "grad_norm": 786.822509765625, "learning_rate": 2.553973984602648e-07, "loss": 84.814, "step": 112370 }, { "epoch": 0.9296438764114654, "grad_norm": 1360.4071044921875, "learning_rate": 2.5495259495987576e-07, "loss": 107.1304, "step": 112380 }, { "epoch": 0.9297265996608347, "grad_norm": 1530.94873046875, "learning_rate": 2.545081690011314e-07, "loss": 98.4278, "step": 112390 }, { "epoch": 0.9298093229102039, "grad_norm": 977.4533081054688, "learning_rate": 2.540641206193939e-07, "loss": 78.7725, "step": 112400 }, { "epoch": 0.9298920461595731, "grad_norm": 600.151611328125, "learning_rate": 2.536204498499922e-07, "loss": 96.0168, "step": 112410 }, { "epoch": 0.9299747694089424, "grad_norm": 640.1029052734375, "learning_rate": 2.5317715672822927e-07, "loss": 85.9019, "step": 112420 }, { "epoch": 0.9300574926583116, "grad_norm": 747.0780029296875, "learning_rate": 2.527342412893746e-07, "loss": 62.0823, "step": 112430 }, { "epoch": 0.9301402159076808, "grad_norm": 1375.25146484375, "learning_rate": 2.5229170356867005e-07, "loss": 96.1561, "step": 112440 }, { "epoch": 0.9302229391570501, "grad_norm": 782.2132568359375, "learning_rate": 2.5184954360132574e-07, "loss": 89.7107, "step": 112450 }, { "epoch": 0.9303056624064193, "grad_norm": 2209.59521484375, "learning_rate": 2.514077614225219e-07, "loss": 105.6186, "step": 112460 }, { "epoch": 0.9303883856557885, "grad_norm": 583.348388671875, "learning_rate": 2.509663570674087e-07, "loss": 80.0686, "step": 112470 }, { "epoch": 0.9304711089051578, "grad_norm": 459.78302001953125, "learning_rate": 2.5052533057110805e-07, "loss": 93.2552, "step": 112480 }, { "epoch": 0.930553832154527, "grad_norm": 617.0430908203125, "learning_rate": 2.500846819687097e-07, "loss": 93.8821, "step": 112490 }, { "epoch": 0.9306365554038962, "grad_norm": 0.0, "learning_rate": 2.4964441129527337e-07, "loss": 83.9168, "step": 112500 }, { "epoch": 0.9307192786532655, "grad_norm": 957.689453125, "learning_rate": 2.4920451858582997e-07, "loss": 81.6168, "step": 112510 }, { "epoch": 0.9308020019026347, "grad_norm": 1347.580810546875, "learning_rate": 2.487650038753803e-07, "loss": 84.5013, "step": 112520 }, { "epoch": 0.9308847251520039, "grad_norm": 840.2437744140625, "learning_rate": 2.483258671988942e-07, "loss": 90.0643, "step": 112530 }, { "epoch": 0.9309674484013732, "grad_norm": 803.8649291992188, "learning_rate": 2.4788710859131203e-07, "loss": 95.1621, "step": 112540 }, { "epoch": 0.9310501716507424, "grad_norm": 775.64599609375, "learning_rate": 2.474487280875426e-07, "loss": 82.0533, "step": 112550 }, { "epoch": 0.9311328949001116, "grad_norm": 648.1033325195312, "learning_rate": 2.470107257224669e-07, "loss": 69.88, "step": 112560 }, { "epoch": 0.931215618149481, "grad_norm": 769.5602416992188, "learning_rate": 2.4657310153093475e-07, "loss": 101.4659, "step": 112570 }, { "epoch": 0.9312983413988501, "grad_norm": 654.4227905273438, "learning_rate": 2.46135855547765e-07, "loss": 79.7583, "step": 112580 }, { "epoch": 0.9313810646482193, "grad_norm": 909.7418823242188, "learning_rate": 2.4569898780774816e-07, "loss": 77.9152, "step": 112590 }, { "epoch": 0.9314637878975887, "grad_norm": 475.4342041015625, "learning_rate": 2.452624983456431e-07, "loss": 87.7683, "step": 112600 }, { "epoch": 0.9315465111469579, "grad_norm": 750.6339111328125, "learning_rate": 2.4482638719618037e-07, "loss": 95.7583, "step": 112610 }, { "epoch": 0.931629234396327, "grad_norm": 719.8361206054688, "learning_rate": 2.443906543940594e-07, "loss": 89.19, "step": 112620 }, { "epoch": 0.9317119576456964, "grad_norm": 788.4232788085938, "learning_rate": 2.439552999739475e-07, "loss": 64.0623, "step": 112630 }, { "epoch": 0.9317946808950656, "grad_norm": 1180.428955078125, "learning_rate": 2.4352032397048584e-07, "loss": 77.4437, "step": 112640 }, { "epoch": 0.9318774041444348, "grad_norm": 702.971923828125, "learning_rate": 2.4308572641828234e-07, "loss": 86.0467, "step": 112650 }, { "epoch": 0.9319601273938041, "grad_norm": 467.1812438964844, "learning_rate": 2.4265150735191644e-07, "loss": 76.0462, "step": 112660 }, { "epoch": 0.9320428506431733, "grad_norm": 1010.5810546875, "learning_rate": 2.422176668059367e-07, "loss": 71.2109, "step": 112670 }, { "epoch": 0.9321255738925425, "grad_norm": 1030.048583984375, "learning_rate": 2.417842048148622e-07, "loss": 98.5793, "step": 112680 }, { "epoch": 0.9322082971419118, "grad_norm": 608.4972534179688, "learning_rate": 2.4135112141318084e-07, "loss": 86.8852, "step": 112690 }, { "epoch": 0.932291020391281, "grad_norm": 1069.7056884765625, "learning_rate": 2.409184166353512e-07, "loss": 73.6224, "step": 112700 }, { "epoch": 0.9323737436406502, "grad_norm": 620.8689575195312, "learning_rate": 2.4048609051580295e-07, "loss": 73.3166, "step": 112710 }, { "epoch": 0.9324564668900195, "grad_norm": 1390.774169921875, "learning_rate": 2.4005414308893304e-07, "loss": 93.2582, "step": 112720 }, { "epoch": 0.9325391901393887, "grad_norm": 1095.233642578125, "learning_rate": 2.396225743891095e-07, "loss": 79.1661, "step": 112730 }, { "epoch": 0.9326219133887579, "grad_norm": 651.0794677734375, "learning_rate": 2.3919138445067045e-07, "loss": 74.3248, "step": 112740 }, { "epoch": 0.9327046366381272, "grad_norm": 694.2014770507812, "learning_rate": 2.3876057330792344e-07, "loss": 72.9786, "step": 112750 }, { "epoch": 0.9327873598874964, "grad_norm": 845.7319946289062, "learning_rate": 2.3833014099514716e-07, "loss": 67.2363, "step": 112760 }, { "epoch": 0.9328700831368656, "grad_norm": 579.4317016601562, "learning_rate": 2.3790008754658811e-07, "loss": 75.9026, "step": 112770 }, { "epoch": 0.9329528063862349, "grad_norm": 1336.7401123046875, "learning_rate": 2.3747041299646389e-07, "loss": 100.3919, "step": 112780 }, { "epoch": 0.9330355296356041, "grad_norm": 782.5462646484375, "learning_rate": 2.3704111737896218e-07, "loss": 66.6842, "step": 112790 }, { "epoch": 0.9331182528849733, "grad_norm": 1084.5948486328125, "learning_rate": 2.3661220072823953e-07, "loss": 86.8441, "step": 112800 }, { "epoch": 0.9332009761343426, "grad_norm": 873.84228515625, "learning_rate": 2.3618366307842312e-07, "loss": 80.4352, "step": 112810 }, { "epoch": 0.9332836993837118, "grad_norm": 935.0689697265625, "learning_rate": 2.3575550446360952e-07, "loss": 74.7977, "step": 112820 }, { "epoch": 0.933366422633081, "grad_norm": 1264.7552490234375, "learning_rate": 2.3532772491786537e-07, "loss": 62.4919, "step": 112830 }, { "epoch": 0.9334491458824503, "grad_norm": 760.6544799804688, "learning_rate": 2.3490032447522792e-07, "loss": 78.2148, "step": 112840 }, { "epoch": 0.9335318691318195, "grad_norm": 3588.122314453125, "learning_rate": 2.3447330316970218e-07, "loss": 120.6314, "step": 112850 }, { "epoch": 0.9336145923811887, "grad_norm": 653.4447021484375, "learning_rate": 2.3404666103526542e-07, "loss": 97.4798, "step": 112860 }, { "epoch": 0.933697315630558, "grad_norm": 705.934814453125, "learning_rate": 2.3362039810586267e-07, "loss": 94.1974, "step": 112870 }, { "epoch": 0.9337800388799272, "grad_norm": 616.7807006835938, "learning_rate": 2.3319451441541018e-07, "loss": 67.858, "step": 112880 }, { "epoch": 0.9338627621292964, "grad_norm": 934.6239624023438, "learning_rate": 2.3276900999779305e-07, "loss": 85.3062, "step": 112890 }, { "epoch": 0.9339454853786656, "grad_norm": 563.1575317382812, "learning_rate": 2.323438848868681e-07, "loss": 75.0296, "step": 112900 }, { "epoch": 0.9340282086280349, "grad_norm": 1033.3599853515625, "learning_rate": 2.319191391164588e-07, "loss": 86.1594, "step": 112910 }, { "epoch": 0.9341109318774041, "grad_norm": 551.6663818359375, "learning_rate": 2.3149477272036146e-07, "loss": 72.7008, "step": 112920 }, { "epoch": 0.9341936551267733, "grad_norm": 884.0999755859375, "learning_rate": 2.3107078573234077e-07, "loss": 88.6581, "step": 112930 }, { "epoch": 0.9342763783761426, "grad_norm": 766.043701171875, "learning_rate": 2.306471781861308e-07, "loss": 75.3607, "step": 112940 }, { "epoch": 0.9343591016255118, "grad_norm": 6711.1845703125, "learning_rate": 2.3022395011543687e-07, "loss": 123.3523, "step": 112950 }, { "epoch": 0.934441824874881, "grad_norm": 771.6279296875, "learning_rate": 2.2980110155393253e-07, "loss": 97.1575, "step": 112960 }, { "epoch": 0.9345245481242503, "grad_norm": 1057.9940185546875, "learning_rate": 2.293786325352626e-07, "loss": 113.2081, "step": 112970 }, { "epoch": 0.9346072713736195, "grad_norm": 1071.353271484375, "learning_rate": 2.289565430930407e-07, "loss": 87.6715, "step": 112980 }, { "epoch": 0.9346899946229887, "grad_norm": 862.3544921875, "learning_rate": 2.2853483326085002e-07, "loss": 79.9122, "step": 112990 }, { "epoch": 0.934772717872358, "grad_norm": 895.1743774414062, "learning_rate": 2.2811350307224534e-07, "loss": 113.0911, "step": 113000 }, { "epoch": 0.9348554411217272, "grad_norm": 753.2420654296875, "learning_rate": 2.2769255256074874e-07, "loss": 84.552, "step": 113010 }, { "epoch": 0.9349381643710964, "grad_norm": 1342.393798828125, "learning_rate": 2.27271981759854e-07, "loss": 97.5154, "step": 113020 }, { "epoch": 0.9350208876204658, "grad_norm": 1412.8348388671875, "learning_rate": 2.2685179070302377e-07, "loss": 82.4936, "step": 113030 }, { "epoch": 0.935103610869835, "grad_norm": 668.9525756835938, "learning_rate": 2.2643197942369022e-07, "loss": 94.1694, "step": 113040 }, { "epoch": 0.9351863341192042, "grad_norm": 772.8831176757812, "learning_rate": 2.2601254795525774e-07, "loss": 81.6293, "step": 113050 }, { "epoch": 0.9352690573685735, "grad_norm": 1103.180908203125, "learning_rate": 2.2559349633109629e-07, "loss": 83.7612, "step": 113060 }, { "epoch": 0.9353517806179427, "grad_norm": 819.5826416015625, "learning_rate": 2.2517482458454808e-07, "loss": 88.8579, "step": 113070 }, { "epoch": 0.9354345038673119, "grad_norm": 2683.924072265625, "learning_rate": 2.2475653274892594e-07, "loss": 102.4598, "step": 113080 }, { "epoch": 0.9355172271166812, "grad_norm": 808.376220703125, "learning_rate": 2.2433862085751157e-07, "loss": 86.0561, "step": 113090 }, { "epoch": 0.9355999503660504, "grad_norm": 618.5714111328125, "learning_rate": 2.2392108894355557e-07, "loss": 45.9369, "step": 113100 }, { "epoch": 0.9356826736154196, "grad_norm": 823.1011962890625, "learning_rate": 2.2350393704027917e-07, "loss": 98.3395, "step": 113110 }, { "epoch": 0.9357653968647889, "grad_norm": 572.7033081054688, "learning_rate": 2.230871651808736e-07, "loss": 94.4214, "step": 113120 }, { "epoch": 0.9358481201141581, "grad_norm": 2472.20947265625, "learning_rate": 2.226707733984995e-07, "loss": 115.9153, "step": 113130 }, { "epoch": 0.9359308433635273, "grad_norm": 665.7183837890625, "learning_rate": 2.2225476172628714e-07, "loss": 60.923, "step": 113140 }, { "epoch": 0.9360135666128966, "grad_norm": 926.3880615234375, "learning_rate": 2.2183913019733605e-07, "loss": 104.7108, "step": 113150 }, { "epoch": 0.9360962898622658, "grad_norm": 866.1433715820312, "learning_rate": 2.2142387884471593e-07, "loss": 74.1606, "step": 113160 }, { "epoch": 0.936179013111635, "grad_norm": 2463.356201171875, "learning_rate": 2.210090077014676e-07, "loss": 73.1271, "step": 113170 }, { "epoch": 0.9362617363610043, "grad_norm": 2465.159912109375, "learning_rate": 2.2059451680059962e-07, "loss": 136.0056, "step": 113180 }, { "epoch": 0.9363444596103735, "grad_norm": 1126.7469482421875, "learning_rate": 2.2018040617509174e-07, "loss": 108.5757, "step": 113190 }, { "epoch": 0.9364271828597427, "grad_norm": 704.0745849609375, "learning_rate": 2.1976667585789257e-07, "loss": 93.8709, "step": 113200 }, { "epoch": 0.936509906109112, "grad_norm": 623.0414428710938, "learning_rate": 2.193533258819208e-07, "loss": 100.2674, "step": 113210 }, { "epoch": 0.9365926293584812, "grad_norm": 956.225830078125, "learning_rate": 2.1894035628006517e-07, "loss": 93.8058, "step": 113220 }, { "epoch": 0.9366753526078504, "grad_norm": 1069.036376953125, "learning_rate": 2.1852776708518265e-07, "loss": 84.0381, "step": 113230 }, { "epoch": 0.9367580758572197, "grad_norm": 1028.188720703125, "learning_rate": 2.18115558330102e-07, "loss": 62.8961, "step": 113240 }, { "epoch": 0.9368407991065889, "grad_norm": 656.3768310546875, "learning_rate": 2.1770373004762035e-07, "loss": 98.3898, "step": 113250 }, { "epoch": 0.9369235223559581, "grad_norm": 1091.6727294921875, "learning_rate": 2.1729228227050426e-07, "loss": 91.2749, "step": 113260 }, { "epoch": 0.9370062456053274, "grad_norm": 1016.352783203125, "learning_rate": 2.1688121503149195e-07, "loss": 81.7884, "step": 113270 }, { "epoch": 0.9370889688546966, "grad_norm": 699.8380737304688, "learning_rate": 2.1647052836329065e-07, "loss": 81.0783, "step": 113280 }, { "epoch": 0.9371716921040658, "grad_norm": 887.7781372070312, "learning_rate": 2.1606022229857525e-07, "loss": 126.7601, "step": 113290 }, { "epoch": 0.9372544153534351, "grad_norm": 768.95166015625, "learning_rate": 2.1565029686999306e-07, "loss": 73.0002, "step": 113300 }, { "epoch": 0.9373371386028043, "grad_norm": 584.2427368164062, "learning_rate": 2.1524075211016014e-07, "loss": 74.3468, "step": 113310 }, { "epoch": 0.9374198618521735, "grad_norm": 1104.49755859375, "learning_rate": 2.148315880516605e-07, "loss": 71.5521, "step": 113320 }, { "epoch": 0.9375025851015428, "grad_norm": 663.4890747070312, "learning_rate": 2.144228047270508e-07, "loss": 82.779, "step": 113330 }, { "epoch": 0.937585308350912, "grad_norm": 746.1346435546875, "learning_rate": 2.140144021688556e-07, "loss": 62.6374, "step": 113340 }, { "epoch": 0.9376680316002812, "grad_norm": 1149.3975830078125, "learning_rate": 2.1360638040957004e-07, "loss": 114.1942, "step": 113350 }, { "epoch": 0.9377507548496505, "grad_norm": 617.9911499023438, "learning_rate": 2.1319873948165704e-07, "loss": 67.7726, "step": 113360 }, { "epoch": 0.9378334780990197, "grad_norm": 4376.74658203125, "learning_rate": 2.1279147941755284e-07, "loss": 123.5628, "step": 113370 }, { "epoch": 0.9379162013483889, "grad_norm": 1298.1807861328125, "learning_rate": 2.123846002496599e-07, "loss": 91.0785, "step": 113380 }, { "epoch": 0.9379989245977582, "grad_norm": 737.0429077148438, "learning_rate": 2.119781020103523e-07, "loss": 78.1234, "step": 113390 }, { "epoch": 0.9380816478471274, "grad_norm": 1079.584716796875, "learning_rate": 2.1157198473197417e-07, "loss": 83.1591, "step": 113400 }, { "epoch": 0.9381643710964966, "grad_norm": 733.552001953125, "learning_rate": 2.111662484468363e-07, "loss": 92.7453, "step": 113410 }, { "epoch": 0.938247094345866, "grad_norm": 328.62188720703125, "learning_rate": 2.1076089318722237e-07, "loss": 84.1476, "step": 113420 }, { "epoch": 0.9383298175952351, "grad_norm": 764.5359497070312, "learning_rate": 2.1035591898538432e-07, "loss": 115.1742, "step": 113430 }, { "epoch": 0.9384125408446043, "grad_norm": 606.0637817382812, "learning_rate": 2.0995132587354416e-07, "loss": 91.7363, "step": 113440 }, { "epoch": 0.9384952640939737, "grad_norm": 631.8139038085938, "learning_rate": 2.0954711388389392e-07, "loss": 70.5418, "step": 113450 }, { "epoch": 0.9385779873433429, "grad_norm": 742.7326049804688, "learning_rate": 2.09143283048594e-07, "loss": 83.6848, "step": 113460 }, { "epoch": 0.938660710592712, "grad_norm": 670.2506103515625, "learning_rate": 2.08739833399777e-07, "loss": 64.4276, "step": 113470 }, { "epoch": 0.9387434338420814, "grad_norm": 569.9981079101562, "learning_rate": 2.0833676496954225e-07, "loss": 73.9018, "step": 113480 }, { "epoch": 0.9388261570914506, "grad_norm": 843.3419189453125, "learning_rate": 2.0793407778996021e-07, "loss": 66.5153, "step": 113490 }, { "epoch": 0.9389088803408198, "grad_norm": 1188.6783447265625, "learning_rate": 2.0753177189307138e-07, "loss": 82.2647, "step": 113500 }, { "epoch": 0.9389916035901891, "grad_norm": 814.5333862304688, "learning_rate": 2.071298473108846e-07, "loss": 97.839, "step": 113510 }, { "epoch": 0.9390743268395583, "grad_norm": 490.64227294921875, "learning_rate": 2.0672830407537925e-07, "loss": 70.4371, "step": 113520 }, { "epoch": 0.9391570500889275, "grad_norm": 819.4855346679688, "learning_rate": 2.0632714221850536e-07, "loss": 79.6486, "step": 113530 }, { "epoch": 0.9392397733382968, "grad_norm": 1092.7366943359375, "learning_rate": 2.0592636177218017e-07, "loss": 80.8918, "step": 113540 }, { "epoch": 0.939322496587666, "grad_norm": 1289.69775390625, "learning_rate": 2.055259627682926e-07, "loss": 89.2574, "step": 113550 }, { "epoch": 0.9394052198370352, "grad_norm": 651.2507934570312, "learning_rate": 2.051259452387e-07, "loss": 77.3755, "step": 113560 }, { "epoch": 0.9394879430864045, "grad_norm": 588.5090942382812, "learning_rate": 2.0472630921523185e-07, "loss": 90.6757, "step": 113570 }, { "epoch": 0.9395706663357737, "grad_norm": 806.9398193359375, "learning_rate": 2.043270547296833e-07, "loss": 84.2983, "step": 113580 }, { "epoch": 0.9396533895851429, "grad_norm": 961.4072265625, "learning_rate": 2.0392818181382168e-07, "loss": 69.7272, "step": 113590 }, { "epoch": 0.9397361128345122, "grad_norm": 765.0098876953125, "learning_rate": 2.0352969049938332e-07, "loss": 92.7051, "step": 113600 }, { "epoch": 0.9398188360838814, "grad_norm": 1120.79833984375, "learning_rate": 2.0313158081807504e-07, "loss": 84.8201, "step": 113610 }, { "epoch": 0.9399015593332506, "grad_norm": 834.6403198242188, "learning_rate": 2.027338528015721e-07, "loss": 77.9415, "step": 113620 }, { "epoch": 0.9399842825826198, "grad_norm": 1245.4927978515625, "learning_rate": 2.0233650648152026e-07, "loss": 106.3016, "step": 113630 }, { "epoch": 0.9400670058319891, "grad_norm": 606.9644165039062, "learning_rate": 2.0193954188953425e-07, "loss": 116.0985, "step": 113640 }, { "epoch": 0.9401497290813583, "grad_norm": 788.4069213867188, "learning_rate": 2.015429590571988e-07, "loss": 84.2174, "step": 113650 }, { "epoch": 0.9402324523307275, "grad_norm": 560.9490356445312, "learning_rate": 2.0114675801606754e-07, "loss": 73.4865, "step": 113660 }, { "epoch": 0.9403151755800968, "grad_norm": 1249.0743408203125, "learning_rate": 2.0075093879766584e-07, "loss": 97.4712, "step": 113670 }, { "epoch": 0.940397898829466, "grad_norm": 814.6300048828125, "learning_rate": 2.003555014334857e-07, "loss": 74.9081, "step": 113680 }, { "epoch": 0.9404806220788352, "grad_norm": 929.8789672851562, "learning_rate": 1.9996044595499142e-07, "loss": 101.6634, "step": 113690 }, { "epoch": 0.9405633453282045, "grad_norm": 495.4698486328125, "learning_rate": 1.9956577239361507e-07, "loss": 79.1617, "step": 113700 }, { "epoch": 0.9406460685775737, "grad_norm": 496.3013916015625, "learning_rate": 1.9917148078075876e-07, "loss": 68.7562, "step": 113710 }, { "epoch": 0.9407287918269429, "grad_norm": 884.4699096679688, "learning_rate": 1.9877757114779517e-07, "loss": 103.0708, "step": 113720 }, { "epoch": 0.9408115150763122, "grad_norm": 874.1282348632812, "learning_rate": 1.983840435260659e-07, "loss": 84.1153, "step": 113730 }, { "epoch": 0.9408942383256814, "grad_norm": 956.4913330078125, "learning_rate": 1.9799089794688197e-07, "loss": 78.011, "step": 113740 }, { "epoch": 0.9409769615750506, "grad_norm": 651.1774291992188, "learning_rate": 1.9759813444152342e-07, "loss": 55.0541, "step": 113750 }, { "epoch": 0.9410596848244199, "grad_norm": 874.5100708007812, "learning_rate": 1.9720575304124135e-07, "loss": 61.1703, "step": 113760 }, { "epoch": 0.9411424080737891, "grad_norm": 646.1497802734375, "learning_rate": 1.9681375377725631e-07, "loss": 53.9226, "step": 113770 }, { "epoch": 0.9412251313231583, "grad_norm": 452.9364318847656, "learning_rate": 1.9642213668075673e-07, "loss": 136.8662, "step": 113780 }, { "epoch": 0.9413078545725276, "grad_norm": 505.2906494140625, "learning_rate": 1.9603090178290207e-07, "loss": 78.0005, "step": 113790 }, { "epoch": 0.9413905778218968, "grad_norm": 842.5142211914062, "learning_rate": 1.9564004911482192e-07, "loss": 59.6014, "step": 113800 }, { "epoch": 0.941473301071266, "grad_norm": 860.8002319335938, "learning_rate": 1.9524957870761364e-07, "loss": 103.618, "step": 113810 }, { "epoch": 0.9415560243206353, "grad_norm": 587.0081176757812, "learning_rate": 1.9485949059234567e-07, "loss": 97.7175, "step": 113820 }, { "epoch": 0.9416387475700045, "grad_norm": 842.8802490234375, "learning_rate": 1.944697848000554e-07, "loss": 126.7808, "step": 113830 }, { "epoch": 0.9417214708193737, "grad_norm": 915.0126342773438, "learning_rate": 1.9408046136174975e-07, "loss": 79.9683, "step": 113840 }, { "epoch": 0.941804194068743, "grad_norm": 609.2335815429688, "learning_rate": 1.9369152030840553e-07, "loss": 92.4454, "step": 113850 }, { "epoch": 0.9418869173181122, "grad_norm": 582.4105224609375, "learning_rate": 1.9330296167096972e-07, "loss": 94.1964, "step": 113860 }, { "epoch": 0.9419696405674814, "grad_norm": 1090.697265625, "learning_rate": 1.9291478548035703e-07, "loss": 78.0568, "step": 113870 }, { "epoch": 0.9420523638168508, "grad_norm": 755.0408325195312, "learning_rate": 1.9252699176745326e-07, "loss": 85.2529, "step": 113880 }, { "epoch": 0.94213508706622, "grad_norm": 657.3673095703125, "learning_rate": 1.9213958056311376e-07, "loss": 65.6693, "step": 113890 }, { "epoch": 0.9422178103155892, "grad_norm": 1077.51904296875, "learning_rate": 1.917525518981622e-07, "loss": 100.6788, "step": 113900 }, { "epoch": 0.9423005335649585, "grad_norm": 780.7044067382812, "learning_rate": 1.91365905803394e-07, "loss": 88.6271, "step": 113910 }, { "epoch": 0.9423832568143277, "grad_norm": 674.3640747070312, "learning_rate": 1.9097964230957112e-07, "loss": 72.1338, "step": 113920 }, { "epoch": 0.9424659800636969, "grad_norm": 830.9854125976562, "learning_rate": 1.9059376144742792e-07, "loss": 82.4124, "step": 113930 }, { "epoch": 0.9425487033130662, "grad_norm": 555.4501342773438, "learning_rate": 1.9020826324766707e-07, "loss": 94.9241, "step": 113940 }, { "epoch": 0.9426314265624354, "grad_norm": 762.3797607421875, "learning_rate": 1.8982314774096067e-07, "loss": 81.5409, "step": 113950 }, { "epoch": 0.9427141498118046, "grad_norm": 840.9845581054688, "learning_rate": 1.894384149579509e-07, "loss": 81.5919, "step": 113960 }, { "epoch": 0.9427968730611739, "grad_norm": 520.0264892578125, "learning_rate": 1.8905406492924884e-07, "loss": 82.138, "step": 113970 }, { "epoch": 0.9428795963105431, "grad_norm": 1191.1163330078125, "learning_rate": 1.8867009768543554e-07, "loss": 92.1979, "step": 113980 }, { "epoch": 0.9429623195599123, "grad_norm": 850.5126342773438, "learning_rate": 1.8828651325706159e-07, "loss": 124.8058, "step": 113990 }, { "epoch": 0.9430450428092816, "grad_norm": 1242.6998291015625, "learning_rate": 1.8790331167464758e-07, "loss": 74.3944, "step": 114000 }, { "epoch": 0.9431277660586508, "grad_norm": 937.7474975585938, "learning_rate": 1.875204929686819e-07, "loss": 78.7968, "step": 114010 }, { "epoch": 0.94321048930802, "grad_norm": 811.911376953125, "learning_rate": 1.8713805716962408e-07, "loss": 81.0114, "step": 114020 }, { "epoch": 0.9432932125573893, "grad_norm": 887.4229736328125, "learning_rate": 1.867560043079031e-07, "loss": 91.9161, "step": 114030 }, { "epoch": 0.9433759358067585, "grad_norm": 556.4613037109375, "learning_rate": 1.8637433441391739e-07, "loss": 81.6592, "step": 114040 }, { "epoch": 0.9434586590561277, "grad_norm": 785.0360107421875, "learning_rate": 1.859930475180338e-07, "loss": 88.5488, "step": 114050 }, { "epoch": 0.943541382305497, "grad_norm": 1435.7166748046875, "learning_rate": 1.8561214365059033e-07, "loss": 75.1003, "step": 114060 }, { "epoch": 0.9436241055548662, "grad_norm": 584.7539672851562, "learning_rate": 1.8523162284189377e-07, "loss": 85.8287, "step": 114070 }, { "epoch": 0.9437068288042354, "grad_norm": 553.2955932617188, "learning_rate": 1.848514851222205e-07, "loss": 86.0015, "step": 114080 }, { "epoch": 0.9437895520536047, "grad_norm": 628.0018310546875, "learning_rate": 1.8447173052181577e-07, "loss": 81.3418, "step": 114090 }, { "epoch": 0.9438722753029739, "grad_norm": 708.7710571289062, "learning_rate": 1.8409235907089484e-07, "loss": 91.275, "step": 114100 }, { "epoch": 0.9439549985523431, "grad_norm": 712.0585327148438, "learning_rate": 1.8371337079964303e-07, "loss": 99.5175, "step": 114110 }, { "epoch": 0.9440377218017124, "grad_norm": 1265.697021484375, "learning_rate": 1.8333476573821395e-07, "loss": 103.0046, "step": 114120 }, { "epoch": 0.9441204450510816, "grad_norm": 937.801025390625, "learning_rate": 1.8295654391673245e-07, "loss": 89.4391, "step": 114130 }, { "epoch": 0.9442031683004508, "grad_norm": 484.9341125488281, "learning_rate": 1.8257870536529167e-07, "loss": 118.8798, "step": 114140 }, { "epoch": 0.9442858915498201, "grad_norm": 604.8045654296875, "learning_rate": 1.8220125011395419e-07, "loss": 78.082, "step": 114150 }, { "epoch": 0.9443686147991893, "grad_norm": 624.2005615234375, "learning_rate": 1.8182417819275266e-07, "loss": 70.6287, "step": 114160 }, { "epoch": 0.9444513380485585, "grad_norm": 622.5794067382812, "learning_rate": 1.8144748963168924e-07, "loss": 80.9847, "step": 114170 }, { "epoch": 0.9445340612979278, "grad_norm": 371.8035888671875, "learning_rate": 1.8107118446073492e-07, "loss": 83.7294, "step": 114180 }, { "epoch": 0.944616784547297, "grad_norm": 911.8489379882812, "learning_rate": 1.806952627098296e-07, "loss": 69.7044, "step": 114190 }, { "epoch": 0.9446995077966662, "grad_norm": 649.6644287109375, "learning_rate": 1.8031972440888556e-07, "loss": 99.0398, "step": 114200 }, { "epoch": 0.9447822310460355, "grad_norm": 1058.6116943359375, "learning_rate": 1.799445695877805e-07, "loss": 105.9399, "step": 114210 }, { "epoch": 0.9448649542954047, "grad_norm": 1170.114013671875, "learning_rate": 1.7956979827636556e-07, "loss": 95.6862, "step": 114220 }, { "epoch": 0.9449476775447739, "grad_norm": 1229.6246337890625, "learning_rate": 1.791954105044591e-07, "loss": 83.8179, "step": 114230 }, { "epoch": 0.9450304007941432, "grad_norm": 673.4191284179688, "learning_rate": 1.788214063018495e-07, "loss": 78.5252, "step": 114240 }, { "epoch": 0.9451131240435124, "grad_norm": 1112.168212890625, "learning_rate": 1.7844778569829412e-07, "loss": 80.5072, "step": 114250 }, { "epoch": 0.9451958472928816, "grad_norm": 1105.3406982421875, "learning_rate": 1.7807454872352137e-07, "loss": 76.1349, "step": 114260 }, { "epoch": 0.945278570542251, "grad_norm": 727.6226806640625, "learning_rate": 1.7770169540722638e-07, "loss": 81.8822, "step": 114270 }, { "epoch": 0.9453612937916201, "grad_norm": 932.0345458984375, "learning_rate": 1.7732922577907595e-07, "loss": 89.935, "step": 114280 }, { "epoch": 0.9454440170409893, "grad_norm": 780.9658813476562, "learning_rate": 1.769571398687059e-07, "loss": 95.7571, "step": 114290 }, { "epoch": 0.9455267402903587, "grad_norm": 883.4459838867188, "learning_rate": 1.765854377057219e-07, "loss": 72.5025, "step": 114300 }, { "epoch": 0.9456094635397279, "grad_norm": 925.8482055664062, "learning_rate": 1.76214119319697e-07, "loss": 86.9446, "step": 114310 }, { "epoch": 0.9456921867890971, "grad_norm": 770.7780151367188, "learning_rate": 1.758431847401776e-07, "loss": 78.9522, "step": 114320 }, { "epoch": 0.9457749100384664, "grad_norm": 670.9049682617188, "learning_rate": 1.7547263399667558e-07, "loss": 84.8401, "step": 114330 }, { "epoch": 0.9458576332878356, "grad_norm": 637.699951171875, "learning_rate": 1.7510246711867572e-07, "loss": 91.2389, "step": 114340 }, { "epoch": 0.9459403565372048, "grad_norm": 908.4950561523438, "learning_rate": 1.7473268413562837e-07, "loss": 92.7157, "step": 114350 }, { "epoch": 0.946023079786574, "grad_norm": 778.84765625, "learning_rate": 1.743632850769561e-07, "loss": 84.7941, "step": 114360 }, { "epoch": 0.9461058030359433, "grad_norm": 945.8580322265625, "learning_rate": 1.739942699720504e-07, "loss": 124.0557, "step": 114370 }, { "epoch": 0.9461885262853125, "grad_norm": 716.9354248046875, "learning_rate": 1.7362563885027272e-07, "loss": 101.1986, "step": 114380 }, { "epoch": 0.9462712495346817, "grad_norm": 920.9649047851562, "learning_rate": 1.7325739174095302e-07, "loss": 72.5662, "step": 114390 }, { "epoch": 0.946353972784051, "grad_norm": 918.943115234375, "learning_rate": 1.728895286733906e-07, "loss": 63.3512, "step": 114400 }, { "epoch": 0.9464366960334202, "grad_norm": 959.2852172851562, "learning_rate": 1.7252204967685427e-07, "loss": 78.725, "step": 114410 }, { "epoch": 0.9465194192827894, "grad_norm": 519.7755126953125, "learning_rate": 1.7215495478058397e-07, "loss": 80.2883, "step": 114420 }, { "epoch": 0.9466021425321587, "grad_norm": 1290.6314697265625, "learning_rate": 1.7178824401378802e-07, "loss": 74.9698, "step": 114430 }, { "epoch": 0.9466848657815279, "grad_norm": 840.70068359375, "learning_rate": 1.7142191740564196e-07, "loss": 72.1562, "step": 114440 }, { "epoch": 0.9467675890308971, "grad_norm": 853.8070068359375, "learning_rate": 1.7105597498529358e-07, "loss": 80.7684, "step": 114450 }, { "epoch": 0.9468503122802664, "grad_norm": 1219.2557373046875, "learning_rate": 1.7069041678186017e-07, "loss": 85.5803, "step": 114460 }, { "epoch": 0.9469330355296356, "grad_norm": 652.1727294921875, "learning_rate": 1.7032524282442618e-07, "loss": 74.4581, "step": 114470 }, { "epoch": 0.9470157587790048, "grad_norm": 469.0629577636719, "learning_rate": 1.6996045314204734e-07, "loss": 71.7418, "step": 114480 }, { "epoch": 0.9470984820283741, "grad_norm": 666.7960815429688, "learning_rate": 1.6959604776374871e-07, "loss": 87.4047, "step": 114490 }, { "epoch": 0.9471812052777433, "grad_norm": 587.7761840820312, "learning_rate": 1.6923202671852379e-07, "loss": 122.6015, "step": 114500 }, { "epoch": 0.9472639285271125, "grad_norm": 1696.044677734375, "learning_rate": 1.688683900353366e-07, "loss": 96.9965, "step": 114510 }, { "epoch": 0.9473466517764818, "grad_norm": 692.3433837890625, "learning_rate": 1.6850513774311906e-07, "loss": 80.0998, "step": 114520 }, { "epoch": 0.947429375025851, "grad_norm": 755.7346801757812, "learning_rate": 1.6814226987077464e-07, "loss": 77.0792, "step": 114530 }, { "epoch": 0.9475120982752202, "grad_norm": 481.3829345703125, "learning_rate": 1.6777978644717474e-07, "loss": 63.8102, "step": 114540 }, { "epoch": 0.9475948215245895, "grad_norm": 654.6975708007812, "learning_rate": 1.6741768750116017e-07, "loss": 56.0552, "step": 114550 }, { "epoch": 0.9476775447739587, "grad_norm": 745.0943603515625, "learning_rate": 1.670559730615412e-07, "loss": 76.7988, "step": 114560 }, { "epoch": 0.9477602680233279, "grad_norm": 604.7420654296875, "learning_rate": 1.6669464315709872e-07, "loss": 75.703, "step": 114570 }, { "epoch": 0.9478429912726972, "grad_norm": 1340.7713623046875, "learning_rate": 1.6633369781658137e-07, "loss": 78.7956, "step": 114580 }, { "epoch": 0.9479257145220664, "grad_norm": 1017.1603393554688, "learning_rate": 1.6597313706870842e-07, "loss": 88.8706, "step": 114590 }, { "epoch": 0.9480084377714356, "grad_norm": 465.18914794921875, "learning_rate": 1.656129609421675e-07, "loss": 90.0202, "step": 114600 }, { "epoch": 0.9480911610208049, "grad_norm": 746.8944091796875, "learning_rate": 1.6525316946561675e-07, "loss": 69.2716, "step": 114610 }, { "epoch": 0.9481738842701741, "grad_norm": 557.8377075195312, "learning_rate": 1.648937626676822e-07, "loss": 73.0987, "step": 114620 }, { "epoch": 0.9482566075195433, "grad_norm": 1114.826171875, "learning_rate": 1.6453474057696152e-07, "loss": 73.0719, "step": 114630 }, { "epoch": 0.9483393307689126, "grad_norm": 1133.66796875, "learning_rate": 1.6417610322201904e-07, "loss": 84.8226, "step": 114640 }, { "epoch": 0.9484220540182818, "grad_norm": 638.8233032226562, "learning_rate": 1.6381785063139144e-07, "loss": 58.4258, "step": 114650 }, { "epoch": 0.948504777267651, "grad_norm": 581.8013916015625, "learning_rate": 1.6345998283358145e-07, "loss": 90.1874, "step": 114660 }, { "epoch": 0.9485875005170203, "grad_norm": 584.7073974609375, "learning_rate": 1.631024998570646e-07, "loss": 108.2388, "step": 114670 }, { "epoch": 0.9486702237663895, "grad_norm": 1126.181884765625, "learning_rate": 1.6274540173028318e-07, "loss": 92.8387, "step": 114680 }, { "epoch": 0.9487529470157587, "grad_norm": 823.510986328125, "learning_rate": 1.6238868848165056e-07, "loss": 86.9546, "step": 114690 }, { "epoch": 0.948835670265128, "grad_norm": 1618.146728515625, "learning_rate": 1.6203236013954792e-07, "loss": 113.7917, "step": 114700 }, { "epoch": 0.9489183935144972, "grad_norm": 6492.607421875, "learning_rate": 1.6167641673232703e-07, "loss": 122.2078, "step": 114710 }, { "epoch": 0.9490011167638664, "grad_norm": 1291.8187255859375, "learning_rate": 1.613208582883091e-07, "loss": 75.8952, "step": 114720 }, { "epoch": 0.9490838400132358, "grad_norm": 712.4785766601562, "learning_rate": 1.609656848357838e-07, "loss": 69.3616, "step": 114730 }, { "epoch": 0.949166563262605, "grad_norm": 1505.3465576171875, "learning_rate": 1.6061089640301063e-07, "loss": 88.8558, "step": 114740 }, { "epoch": 0.9492492865119742, "grad_norm": 827.9591064453125, "learning_rate": 1.6025649301821877e-07, "loss": 93.01, "step": 114750 }, { "epoch": 0.9493320097613435, "grad_norm": 877.7481689453125, "learning_rate": 1.599024747096062e-07, "loss": 93.6232, "step": 114760 }, { "epoch": 0.9494147330107127, "grad_norm": 1036.861083984375, "learning_rate": 1.595488415053409e-07, "loss": 90.6146, "step": 114770 }, { "epoch": 0.9494974562600819, "grad_norm": 577.421875, "learning_rate": 1.591955934335593e-07, "loss": 68.6489, "step": 114780 }, { "epoch": 0.9495801795094512, "grad_norm": 877.7052612304688, "learning_rate": 1.588427305223683e-07, "loss": 94.7773, "step": 114790 }, { "epoch": 0.9496629027588204, "grad_norm": 977.225341796875, "learning_rate": 1.584902527998433e-07, "loss": 80.5525, "step": 114800 }, { "epoch": 0.9497456260081896, "grad_norm": 1442.789794921875, "learning_rate": 1.5813816029402963e-07, "loss": 83.8474, "step": 114810 }, { "epoch": 0.9498283492575589, "grad_norm": 750.039306640625, "learning_rate": 1.5778645303294094e-07, "loss": 82.7841, "step": 114820 }, { "epoch": 0.9499110725069281, "grad_norm": 716.677490234375, "learning_rate": 1.5743513104456154e-07, "loss": 83.8299, "step": 114830 }, { "epoch": 0.9499937957562973, "grad_norm": 688.27685546875, "learning_rate": 1.5708419435684463e-07, "loss": 93.2964, "step": 114840 }, { "epoch": 0.9500765190056666, "grad_norm": 907.2855834960938, "learning_rate": 1.5673364299771177e-07, "loss": 100.0956, "step": 114850 }, { "epoch": 0.9501592422550358, "grad_norm": 1261.3101806640625, "learning_rate": 1.5638347699505673e-07, "loss": 93.222, "step": 114860 }, { "epoch": 0.950241965504405, "grad_norm": 672.2671508789062, "learning_rate": 1.5603369637673727e-07, "loss": 119.3111, "step": 114870 }, { "epoch": 0.9503246887537743, "grad_norm": 998.6853637695312, "learning_rate": 1.5568430117058718e-07, "loss": 69.6595, "step": 114880 }, { "epoch": 0.9504074120031435, "grad_norm": 602.7067260742188, "learning_rate": 1.553352914044043e-07, "loss": 88.5864, "step": 114890 }, { "epoch": 0.9504901352525127, "grad_norm": 914.7874145507812, "learning_rate": 1.5498666710595855e-07, "loss": 66.891, "step": 114900 }, { "epoch": 0.950572858501882, "grad_norm": 843.3950805664062, "learning_rate": 1.5463842830298782e-07, "loss": 80.3389, "step": 114910 }, { "epoch": 0.9506555817512512, "grad_norm": 720.8048706054688, "learning_rate": 1.5429057502320045e-07, "loss": 82.6339, "step": 114920 }, { "epoch": 0.9507383050006204, "grad_norm": 1675.1365966796875, "learning_rate": 1.5394310729427265e-07, "loss": 100.8808, "step": 114930 }, { "epoch": 0.9508210282499897, "grad_norm": 751.2517700195312, "learning_rate": 1.535960251438523e-07, "loss": 80.3977, "step": 114940 }, { "epoch": 0.9509037514993589, "grad_norm": 557.0956420898438, "learning_rate": 1.53249328599554e-07, "loss": 75.274, "step": 114950 }, { "epoch": 0.9509864747487281, "grad_norm": 1328.832275390625, "learning_rate": 1.5290301768896287e-07, "loss": 63.9589, "step": 114960 }, { "epoch": 0.9510691979980974, "grad_norm": 811.1527709960938, "learning_rate": 1.5255709243963246e-07, "loss": 86.3641, "step": 114970 }, { "epoch": 0.9511519212474666, "grad_norm": 1016.0829467773438, "learning_rate": 1.5221155287908851e-07, "loss": 80.3929, "step": 114980 }, { "epoch": 0.9512346444968358, "grad_norm": 1659.81787109375, "learning_rate": 1.518663990348229e-07, "loss": 106.4811, "step": 114990 }, { "epoch": 0.9513173677462051, "grad_norm": 682.6334838867188, "learning_rate": 1.5152163093429762e-07, "loss": 82.3017, "step": 115000 }, { "epoch": 0.9514000909955743, "grad_norm": 1205.90576171875, "learning_rate": 1.5117724860494509e-07, "loss": 84.5935, "step": 115010 }, { "epoch": 0.9514828142449435, "grad_norm": 691.462890625, "learning_rate": 1.5083325207416565e-07, "loss": 72.7825, "step": 115020 }, { "epoch": 0.9515655374943128, "grad_norm": 810.3004150390625, "learning_rate": 1.504896413693302e-07, "loss": 85.171, "step": 115030 }, { "epoch": 0.951648260743682, "grad_norm": 750.6644897460938, "learning_rate": 1.501464165177774e-07, "loss": 94.0332, "step": 115040 }, { "epoch": 0.9517309839930512, "grad_norm": 463.6679992675781, "learning_rate": 1.4980357754681595e-07, "loss": 81.951, "step": 115050 }, { "epoch": 0.9518137072424205, "grad_norm": 824.5008544921875, "learning_rate": 1.4946112448372462e-07, "loss": 96.1971, "step": 115060 }, { "epoch": 0.9518964304917897, "grad_norm": 629.86572265625, "learning_rate": 1.491190573557505e-07, "loss": 87.0432, "step": 115070 }, { "epoch": 0.9519791537411589, "grad_norm": 1388.0849609375, "learning_rate": 1.4877737619011067e-07, "loss": 94.4698, "step": 115080 }, { "epoch": 0.9520618769905281, "grad_norm": 826.2018432617188, "learning_rate": 1.4843608101399065e-07, "loss": 83.7667, "step": 115090 }, { "epoch": 0.9521446002398974, "grad_norm": 675.1743774414062, "learning_rate": 1.4809517185454646e-07, "loss": 67.9461, "step": 115100 }, { "epoch": 0.9522273234892666, "grad_norm": 530.15380859375, "learning_rate": 1.4775464873890256e-07, "loss": 54.279, "step": 115110 }, { "epoch": 0.9523100467386358, "grad_norm": 862.5327758789062, "learning_rate": 1.4741451169415165e-07, "loss": 95.6716, "step": 115120 }, { "epoch": 0.9523927699880052, "grad_norm": 1480.7520751953125, "learning_rate": 1.4707476074735772e-07, "loss": 112.3083, "step": 115130 }, { "epoch": 0.9524754932373743, "grad_norm": 440.8155517578125, "learning_rate": 1.4673539592555354e-07, "loss": 67.7535, "step": 115140 }, { "epoch": 0.9525582164867435, "grad_norm": 760.7919311523438, "learning_rate": 1.4639641725574028e-07, "loss": 81.5349, "step": 115150 }, { "epoch": 0.9526409397361129, "grad_norm": 711.9234008789062, "learning_rate": 1.460578247648886e-07, "loss": 81.3426, "step": 115160 }, { "epoch": 0.9527236629854821, "grad_norm": 728.8473510742188, "learning_rate": 1.4571961847993977e-07, "loss": 85.5738, "step": 115170 }, { "epoch": 0.9528063862348513, "grad_norm": 426.4129943847656, "learning_rate": 1.453817984278022e-07, "loss": 90.5035, "step": 115180 }, { "epoch": 0.9528891094842206, "grad_norm": 623.3121337890625, "learning_rate": 1.450443646353561e-07, "loss": 79.0887, "step": 115190 }, { "epoch": 0.9529718327335898, "grad_norm": 748.9600219726562, "learning_rate": 1.4470731712944885e-07, "loss": 110.1946, "step": 115200 }, { "epoch": 0.953054555982959, "grad_norm": 567.2506713867188, "learning_rate": 1.443706559368968e-07, "loss": 76.4982, "step": 115210 }, { "epoch": 0.9531372792323283, "grad_norm": 406.186279296875, "learning_rate": 1.4403438108448742e-07, "loss": 120.5117, "step": 115220 }, { "epoch": 0.9532200024816975, "grad_norm": 800.6393432617188, "learning_rate": 1.436984925989765e-07, "loss": 77.8392, "step": 115230 }, { "epoch": 0.9533027257310667, "grad_norm": 632.5950927734375, "learning_rate": 1.4336299050708935e-07, "loss": 69.4117, "step": 115240 }, { "epoch": 0.953385448980436, "grad_norm": 855.23974609375, "learning_rate": 1.4302787483551962e-07, "loss": 80.8834, "step": 115250 }, { "epoch": 0.9534681722298052, "grad_norm": 749.250732421875, "learning_rate": 1.426931456109315e-07, "loss": 87.1514, "step": 115260 }, { "epoch": 0.9535508954791744, "grad_norm": 503.9537658691406, "learning_rate": 1.4235880285995762e-07, "loss": 58.2232, "step": 115270 }, { "epoch": 0.9536336187285437, "grad_norm": 932.1503295898438, "learning_rate": 1.4202484660920057e-07, "loss": 87.4663, "step": 115280 }, { "epoch": 0.9537163419779129, "grad_norm": 973.0960083007812, "learning_rate": 1.4169127688523187e-07, "loss": 86.1386, "step": 115290 }, { "epoch": 0.9537990652272821, "grad_norm": 702.0614013671875, "learning_rate": 1.413580937145914e-07, "loss": 77.9156, "step": 115300 }, { "epoch": 0.9538817884766514, "grad_norm": 870.7654418945312, "learning_rate": 1.410252971237891e-07, "loss": 92.6622, "step": 115310 }, { "epoch": 0.9539645117260206, "grad_norm": 1024.184326171875, "learning_rate": 1.406928871393043e-07, "loss": 81.5768, "step": 115320 }, { "epoch": 0.9540472349753898, "grad_norm": 1577.88525390625, "learning_rate": 1.4036086378758474e-07, "loss": 82.3757, "step": 115330 }, { "epoch": 0.9541299582247591, "grad_norm": 652.8252563476562, "learning_rate": 1.4002922709504874e-07, "loss": 72.8365, "step": 115340 }, { "epoch": 0.9542126814741283, "grad_norm": 1175.390625, "learning_rate": 1.3969797708808296e-07, "loss": 84.131, "step": 115350 }, { "epoch": 0.9542954047234975, "grad_norm": 1300.7655029296875, "learning_rate": 1.39367113793043e-07, "loss": 82.6223, "step": 115360 }, { "epoch": 0.9543781279728668, "grad_norm": 706.9264526367188, "learning_rate": 1.390366372362556e-07, "loss": 95.0086, "step": 115370 }, { "epoch": 0.954460851222236, "grad_norm": 700.4502563476562, "learning_rate": 1.3870654744401358e-07, "loss": 83.8916, "step": 115380 }, { "epoch": 0.9545435744716052, "grad_norm": 775.755859375, "learning_rate": 1.3837684444258092e-07, "loss": 76.1233, "step": 115390 }, { "epoch": 0.9546262977209745, "grad_norm": 778.3601684570312, "learning_rate": 1.3804752825819113e-07, "loss": 90.3711, "step": 115400 }, { "epoch": 0.9547090209703437, "grad_norm": 1078.059326171875, "learning_rate": 1.3771859891704653e-07, "loss": 66.5463, "step": 115410 }, { "epoch": 0.9547917442197129, "grad_norm": 954.205322265625, "learning_rate": 1.373900564453179e-07, "loss": 80.5711, "step": 115420 }, { "epoch": 0.9548744674690822, "grad_norm": 1166.7353515625, "learning_rate": 1.3706190086914595e-07, "loss": 74.4393, "step": 115430 }, { "epoch": 0.9549571907184514, "grad_norm": 567.34423828125, "learning_rate": 1.3673413221464039e-07, "loss": 75.5963, "step": 115440 }, { "epoch": 0.9550399139678206, "grad_norm": 1016.5427856445312, "learning_rate": 1.3640675050788088e-07, "loss": 82.2038, "step": 115450 }, { "epoch": 0.9551226372171899, "grad_norm": 744.4939575195312, "learning_rate": 1.360797557749155e-07, "loss": 81.3256, "step": 115460 }, { "epoch": 0.9552053604665591, "grad_norm": 757.7814331054688, "learning_rate": 1.3575314804176176e-07, "loss": 68.7753, "step": 115470 }, { "epoch": 0.9552880837159283, "grad_norm": 502.91387939453125, "learning_rate": 1.3542692733440555e-07, "loss": 86.8093, "step": 115480 }, { "epoch": 0.9553708069652976, "grad_norm": 464.1839904785156, "learning_rate": 1.3510109367880387e-07, "loss": 91.0751, "step": 115490 }, { "epoch": 0.9554535302146668, "grad_norm": 1076.8289794921875, "learning_rate": 1.3477564710088097e-07, "loss": 95.7027, "step": 115500 }, { "epoch": 0.955536253464036, "grad_norm": 701.7645874023438, "learning_rate": 1.3445058762653174e-07, "loss": 91.7253, "step": 115510 }, { "epoch": 0.9556189767134053, "grad_norm": 1526.50439453125, "learning_rate": 1.3412591528161935e-07, "loss": 109.7217, "step": 115520 }, { "epoch": 0.9557016999627745, "grad_norm": 1529.469482421875, "learning_rate": 1.338016300919759e-07, "loss": 80.6137, "step": 115530 }, { "epoch": 0.9557844232121437, "grad_norm": 649.20068359375, "learning_rate": 1.3347773208340464e-07, "loss": 82.6933, "step": 115540 }, { "epoch": 0.955867146461513, "grad_norm": 1042.389404296875, "learning_rate": 1.3315422128167555e-07, "loss": 79.4586, "step": 115550 }, { "epoch": 0.9559498697108822, "grad_norm": 921.5702514648438, "learning_rate": 1.3283109771252966e-07, "loss": 88.0491, "step": 115560 }, { "epoch": 0.9560325929602514, "grad_norm": 533.2924194335938, "learning_rate": 1.3250836140167588e-07, "loss": 63.4962, "step": 115570 }, { "epoch": 0.9561153162096208, "grad_norm": 578.7036743164062, "learning_rate": 1.3218601237479255e-07, "loss": 78.6079, "step": 115580 }, { "epoch": 0.95619803945899, "grad_norm": 1517.4697265625, "learning_rate": 1.3186405065752861e-07, "loss": 79.1212, "step": 115590 }, { "epoch": 0.9562807627083592, "grad_norm": 694.7293090820312, "learning_rate": 1.315424762755002e-07, "loss": 75.5039, "step": 115600 }, { "epoch": 0.9563634859577285, "grad_norm": 866.1787719726562, "learning_rate": 1.3122128925429356e-07, "loss": 104.6923, "step": 115610 }, { "epoch": 0.9564462092070977, "grad_norm": 821.7919921875, "learning_rate": 1.3090048961946433e-07, "loss": 111.5836, "step": 115620 }, { "epoch": 0.9565289324564669, "grad_norm": 1143.0037841796875, "learning_rate": 1.305800773965371e-07, "loss": 107.7157, "step": 115630 }, { "epoch": 0.9566116557058362, "grad_norm": 874.7697143554688, "learning_rate": 1.3026005261100537e-07, "loss": 58.3183, "step": 115640 }, { "epoch": 0.9566943789552054, "grad_norm": 968.6356201171875, "learning_rate": 1.2994041528833267e-07, "loss": 73.539, "step": 115650 }, { "epoch": 0.9567771022045746, "grad_norm": 657.7074584960938, "learning_rate": 1.2962116545394977e-07, "loss": 88.1406, "step": 115660 }, { "epoch": 0.9568598254539439, "grad_norm": 964.78662109375, "learning_rate": 1.2930230313325908e-07, "loss": 92.0967, "step": 115670 }, { "epoch": 0.9569425487033131, "grad_norm": 537.0193481445312, "learning_rate": 1.2898382835163093e-07, "loss": 71.7896, "step": 115680 }, { "epoch": 0.9570252719526823, "grad_norm": 1111.5057373046875, "learning_rate": 1.2866574113440444e-07, "loss": 86.3393, "step": 115690 }, { "epoch": 0.9571079952020516, "grad_norm": 862.1448364257812, "learning_rate": 1.2834804150688828e-07, "loss": 85.4919, "step": 115700 }, { "epoch": 0.9571907184514208, "grad_norm": 816.7103271484375, "learning_rate": 1.2803072949436058e-07, "loss": 83.843, "step": 115710 }, { "epoch": 0.95727344170079, "grad_norm": 776.513671875, "learning_rate": 1.277138051220689e-07, "loss": 98.6719, "step": 115720 }, { "epoch": 0.9573561649501593, "grad_norm": 596.2342529296875, "learning_rate": 1.2739726841522858e-07, "loss": 82.0256, "step": 115730 }, { "epoch": 0.9574388881995285, "grad_norm": 825.4178466796875, "learning_rate": 1.270811193990257e-07, "loss": 86.8246, "step": 115740 }, { "epoch": 0.9575216114488977, "grad_norm": 740.2637329101562, "learning_rate": 1.267653580986139e-07, "loss": 74.5113, "step": 115750 }, { "epoch": 0.957604334698267, "grad_norm": 432.4769592285156, "learning_rate": 1.2644998453911762e-07, "loss": 73.2089, "step": 115760 }, { "epoch": 0.9576870579476362, "grad_norm": 2192.880126953125, "learning_rate": 1.2613499874563006e-07, "loss": 113.4138, "step": 115770 }, { "epoch": 0.9577697811970054, "grad_norm": 771.948486328125, "learning_rate": 1.2582040074321177e-07, "loss": 85.1038, "step": 115780 }, { "epoch": 0.9578525044463747, "grad_norm": 656.0718383789062, "learning_rate": 1.255061905568955e-07, "loss": 97.9296, "step": 115790 }, { "epoch": 0.9579352276957439, "grad_norm": 875.1376953125, "learning_rate": 1.251923682116807e-07, "loss": 76.1461, "step": 115800 }, { "epoch": 0.9580179509451131, "grad_norm": 622.1165771484375, "learning_rate": 1.248789337325368e-07, "loss": 81.3456, "step": 115810 }, { "epoch": 0.9581006741944823, "grad_norm": 1635.0670166015625, "learning_rate": 1.2456588714440167e-07, "loss": 106.8132, "step": 115820 }, { "epoch": 0.9581833974438516, "grad_norm": 610.9708251953125, "learning_rate": 1.2425322847218368e-07, "loss": 83.99, "step": 115830 }, { "epoch": 0.9582661206932208, "grad_norm": 882.1947631835938, "learning_rate": 1.239409577407602e-07, "loss": 74.6342, "step": 115840 }, { "epoch": 0.95834884394259, "grad_norm": 1295.30126953125, "learning_rate": 1.2362907497497633e-07, "loss": 88.346, "step": 115850 }, { "epoch": 0.9584315671919593, "grad_norm": 1216.3671875, "learning_rate": 1.233175801996478e-07, "loss": 95.8267, "step": 115860 }, { "epoch": 0.9585142904413285, "grad_norm": 1159.141845703125, "learning_rate": 1.2300647343955807e-07, "loss": 98.5586, "step": 115870 }, { "epoch": 0.9585970136906977, "grad_norm": 1170.9654541015625, "learning_rate": 1.2269575471946127e-07, "loss": 77.9647, "step": 115880 }, { "epoch": 0.958679736940067, "grad_norm": 526.090087890625, "learning_rate": 1.2238542406407984e-07, "loss": 86.4296, "step": 115890 }, { "epoch": 0.9587624601894362, "grad_norm": 970.7127685546875, "learning_rate": 1.22075481498104e-07, "loss": 70.3665, "step": 115900 }, { "epoch": 0.9588451834388054, "grad_norm": 747.5088500976562, "learning_rate": 1.2176592704619628e-07, "loss": 67.7107, "step": 115910 }, { "epoch": 0.9589279066881747, "grad_norm": 1059.5880126953125, "learning_rate": 1.2145676073298473e-07, "loss": 61.7566, "step": 115920 }, { "epoch": 0.9590106299375439, "grad_norm": 982.748046875, "learning_rate": 1.211479825830697e-07, "loss": 106.074, "step": 115930 }, { "epoch": 0.9590933531869131, "grad_norm": 676.8903198242188, "learning_rate": 1.2083959262101874e-07, "loss": 96.3958, "step": 115940 }, { "epoch": 0.9591760764362824, "grad_norm": 1083.4862060546875, "learning_rate": 1.205315908713689e-07, "loss": 84.409, "step": 115950 }, { "epoch": 0.9592587996856516, "grad_norm": 1195.280517578125, "learning_rate": 1.2022397735862724e-07, "loss": 94.2063, "step": 115960 }, { "epoch": 0.9593415229350208, "grad_norm": 891.7564086914062, "learning_rate": 1.199167521072686e-07, "loss": 121.5456, "step": 115970 }, { "epoch": 0.9594242461843902, "grad_norm": 679.1651611328125, "learning_rate": 1.196099151417368e-07, "loss": 70.1369, "step": 115980 }, { "epoch": 0.9595069694337593, "grad_norm": 1112.198486328125, "learning_rate": 1.1930346648644675e-07, "loss": 92.8188, "step": 115990 }, { "epoch": 0.9595896926831285, "grad_norm": 540.4926147460938, "learning_rate": 1.1899740616578004e-07, "loss": 93.057, "step": 116000 }, { "epoch": 0.9596724159324979, "grad_norm": 913.5923461914062, "learning_rate": 1.1869173420408886e-07, "loss": 83.6311, "step": 116010 }, { "epoch": 0.9597551391818671, "grad_norm": 704.4148559570312, "learning_rate": 1.1838645062569377e-07, "loss": 82.1583, "step": 116020 }, { "epoch": 0.9598378624312363, "grad_norm": 983.6731567382812, "learning_rate": 1.1808155545488586e-07, "loss": 71.0212, "step": 116030 }, { "epoch": 0.9599205856806056, "grad_norm": 1065.49072265625, "learning_rate": 1.1777704871592355e-07, "loss": 97.0757, "step": 116040 }, { "epoch": 0.9600033089299748, "grad_norm": 2115.354248046875, "learning_rate": 1.174729304330352e-07, "loss": 83.9105, "step": 116050 }, { "epoch": 0.960086032179344, "grad_norm": 895.270751953125, "learning_rate": 1.1716920063041815e-07, "loss": 94.6967, "step": 116060 }, { "epoch": 0.9601687554287133, "grad_norm": 776.3504638671875, "learning_rate": 1.168658593322386e-07, "loss": 75.9079, "step": 116070 }, { "epoch": 0.9602514786780825, "grad_norm": 933.9031372070312, "learning_rate": 1.165629065626317e-07, "loss": 89.017, "step": 116080 }, { "epoch": 0.9603342019274517, "grad_norm": 984.0797119140625, "learning_rate": 1.1626034234570261e-07, "loss": 112.102, "step": 116090 }, { "epoch": 0.960416925176821, "grad_norm": 813.5068969726562, "learning_rate": 1.1595816670552429e-07, "loss": 51.8942, "step": 116100 }, { "epoch": 0.9604996484261902, "grad_norm": 717.0478515625, "learning_rate": 1.1565637966613974e-07, "loss": 70.7065, "step": 116110 }, { "epoch": 0.9605823716755594, "grad_norm": 879.2958984375, "learning_rate": 1.1535498125156197e-07, "loss": 81.3121, "step": 116120 }, { "epoch": 0.9606650949249287, "grad_norm": 1321.8685302734375, "learning_rate": 1.1505397148577013e-07, "loss": 52.1673, "step": 116130 }, { "epoch": 0.9607478181742979, "grad_norm": 987.5189208984375, "learning_rate": 1.1475335039271507e-07, "loss": 70.81, "step": 116140 }, { "epoch": 0.9608305414236671, "grad_norm": 899.2925415039062, "learning_rate": 1.1445311799631598e-07, "loss": 92.3291, "step": 116150 }, { "epoch": 0.9609132646730364, "grad_norm": 756.8441772460938, "learning_rate": 1.1415327432046041e-07, "loss": 112.0039, "step": 116160 }, { "epoch": 0.9609959879224056, "grad_norm": 635.4735107421875, "learning_rate": 1.1385381938900597e-07, "loss": 71.928, "step": 116170 }, { "epoch": 0.9610787111717748, "grad_norm": 4364.9423828125, "learning_rate": 1.1355475322577858e-07, "loss": 115.5651, "step": 116180 }, { "epoch": 0.9611614344211441, "grad_norm": 619.9732055664062, "learning_rate": 1.1325607585457366e-07, "loss": 106.7126, "step": 116190 }, { "epoch": 0.9612441576705133, "grad_norm": 897.743408203125, "learning_rate": 1.1295778729915551e-07, "loss": 70.6706, "step": 116200 }, { "epoch": 0.9613268809198825, "grad_norm": 1066.9715576171875, "learning_rate": 1.1265988758325742e-07, "loss": 81.003, "step": 116210 }, { "epoch": 0.9614096041692518, "grad_norm": 805.7496337890625, "learning_rate": 1.1236237673058315e-07, "loss": 107.8749, "step": 116220 }, { "epoch": 0.961492327418621, "grad_norm": 593.794677734375, "learning_rate": 1.1206525476480323e-07, "loss": 103.9387, "step": 116230 }, { "epoch": 0.9615750506679902, "grad_norm": 1031.0078125, "learning_rate": 1.1176852170955821e-07, "loss": 81.512, "step": 116240 }, { "epoch": 0.9616577739173595, "grad_norm": 388.48583984375, "learning_rate": 1.1147217758845752e-07, "loss": 71.2489, "step": 116250 }, { "epoch": 0.9617404971667287, "grad_norm": 1047.6055908203125, "learning_rate": 1.1117622242508064e-07, "loss": 84.0228, "step": 116260 }, { "epoch": 0.9618232204160979, "grad_norm": 386.8448486328125, "learning_rate": 1.1088065624297484e-07, "loss": 51.314, "step": 116270 }, { "epoch": 0.9619059436654672, "grad_norm": 1203.3616943359375, "learning_rate": 1.1058547906565743e-07, "loss": 67.1805, "step": 116280 }, { "epoch": 0.9619886669148364, "grad_norm": 992.1272583007812, "learning_rate": 1.1029069091661459e-07, "loss": 103.1852, "step": 116290 }, { "epoch": 0.9620713901642056, "grad_norm": 621.4592895507812, "learning_rate": 1.0999629181929983e-07, "loss": 73.4462, "step": 116300 }, { "epoch": 0.9621541134135749, "grad_norm": 1026.2972412109375, "learning_rate": 1.0970228179713827e-07, "loss": 73.2935, "step": 116310 }, { "epoch": 0.9622368366629441, "grad_norm": 725.5745239257812, "learning_rate": 1.0940866087352287e-07, "loss": 76.0764, "step": 116320 }, { "epoch": 0.9623195599123133, "grad_norm": 780.3145141601562, "learning_rate": 1.0911542907181605e-07, "loss": 65.7862, "step": 116330 }, { "epoch": 0.9624022831616826, "grad_norm": 584.7391357421875, "learning_rate": 1.0882258641534749e-07, "loss": 66.0809, "step": 116340 }, { "epoch": 0.9624850064110518, "grad_norm": 1102.67919921875, "learning_rate": 1.0853013292741854e-07, "loss": 74.0244, "step": 116350 }, { "epoch": 0.962567729660421, "grad_norm": 282.3551940917969, "learning_rate": 1.0823806863129838e-07, "loss": 74.1816, "step": 116360 }, { "epoch": 0.9626504529097903, "grad_norm": 876.3226928710938, "learning_rate": 1.0794639355022507e-07, "loss": 88.0445, "step": 116370 }, { "epoch": 0.9627331761591595, "grad_norm": 719.6063842773438, "learning_rate": 1.0765510770740506e-07, "loss": 75.5096, "step": 116380 }, { "epoch": 0.9628158994085287, "grad_norm": 1058.7418212890625, "learning_rate": 1.0736421112601592e-07, "loss": 88.4987, "step": 116390 }, { "epoch": 0.962898622657898, "grad_norm": 544.888671875, "learning_rate": 1.070737038292019e-07, "loss": 86.7714, "step": 116400 }, { "epoch": 0.9629813459072672, "grad_norm": 1347.0355224609375, "learning_rate": 1.0678358584007787e-07, "loss": 100.632, "step": 116410 }, { "epoch": 0.9630640691566364, "grad_norm": 557.1162719726562, "learning_rate": 1.0649385718172756e-07, "loss": 98.0301, "step": 116420 }, { "epoch": 0.9631467924060058, "grad_norm": 601.667236328125, "learning_rate": 1.0620451787720254e-07, "loss": 55.4105, "step": 116430 }, { "epoch": 0.963229515655375, "grad_norm": 627.3742065429688, "learning_rate": 1.059155679495244e-07, "loss": 89.8003, "step": 116440 }, { "epoch": 0.9633122389047442, "grad_norm": 612.9856567382812, "learning_rate": 1.0562700742168364e-07, "loss": 79.46, "step": 116450 }, { "epoch": 0.9633949621541135, "grad_norm": 969.4473876953125, "learning_rate": 1.0533883631663966e-07, "loss": 105.5287, "step": 116460 }, { "epoch": 0.9634776854034827, "grad_norm": 1106.258544921875, "learning_rate": 1.0505105465732135e-07, "loss": 85.9468, "step": 116470 }, { "epoch": 0.9635604086528519, "grad_norm": 1332.35546875, "learning_rate": 1.0476366246662595e-07, "loss": 84.5516, "step": 116480 }, { "epoch": 0.9636431319022212, "grad_norm": 922.864013671875, "learning_rate": 1.044766597674196e-07, "loss": 80.9004, "step": 116490 }, { "epoch": 0.9637258551515904, "grad_norm": 458.0718688964844, "learning_rate": 1.0419004658253795e-07, "loss": 81.3689, "step": 116500 }, { "epoch": 0.9638085784009596, "grad_norm": 821.3236083984375, "learning_rate": 1.0390382293478551e-07, "loss": 77.9656, "step": 116510 }, { "epoch": 0.9638913016503289, "grad_norm": 836.4533081054688, "learning_rate": 1.036179888469363e-07, "loss": 127.9579, "step": 116520 }, { "epoch": 0.9639740248996981, "grad_norm": 1029.4652099609375, "learning_rate": 1.0333254434173212e-07, "loss": 84.6967, "step": 116530 }, { "epoch": 0.9640567481490673, "grad_norm": 869.0244750976562, "learning_rate": 1.0304748944188425e-07, "loss": 76.1726, "step": 116540 }, { "epoch": 0.9641394713984365, "grad_norm": 824.9259033203125, "learning_rate": 1.0276282417007399e-07, "loss": 85.9181, "step": 116550 }, { "epoch": 0.9642221946478058, "grad_norm": 630.62109375, "learning_rate": 1.02478548548951e-07, "loss": 92.7414, "step": 116560 }, { "epoch": 0.964304917897175, "grad_norm": 1001.7970581054688, "learning_rate": 1.0219466260113276e-07, "loss": 93.9342, "step": 116570 }, { "epoch": 0.9643876411465442, "grad_norm": 711.27392578125, "learning_rate": 1.0191116634920728e-07, "loss": 91.1572, "step": 116580 }, { "epoch": 0.9644703643959135, "grad_norm": 980.2251586914062, "learning_rate": 1.0162805981573154e-07, "loss": 112.6816, "step": 116590 }, { "epoch": 0.9645530876452827, "grad_norm": 983.453857421875, "learning_rate": 1.0134534302323029e-07, "loss": 99.6782, "step": 116600 }, { "epoch": 0.9646358108946519, "grad_norm": 362.87335205078125, "learning_rate": 1.0106301599419832e-07, "loss": 80.6802, "step": 116610 }, { "epoch": 0.9647185341440212, "grad_norm": 757.7172241210938, "learning_rate": 1.0078107875109878e-07, "loss": 75.8858, "step": 116620 }, { "epoch": 0.9648012573933904, "grad_norm": 863.5123901367188, "learning_rate": 1.0049953131636481e-07, "loss": 93.2976, "step": 116630 }, { "epoch": 0.9648839806427596, "grad_norm": 642.6458740234375, "learning_rate": 1.002183737123974e-07, "loss": 81.1781, "step": 116640 }, { "epoch": 0.9649667038921289, "grad_norm": 626.2711791992188, "learning_rate": 9.993760596156698e-08, "loss": 75.3195, "step": 116650 }, { "epoch": 0.9650494271414981, "grad_norm": 792.793212890625, "learning_rate": 9.965722808621403e-08, "loss": 73.6435, "step": 116660 }, { "epoch": 0.9651321503908673, "grad_norm": 803.5825805664062, "learning_rate": 9.937724010864402e-08, "loss": 71.446, "step": 116670 }, { "epoch": 0.9652148736402366, "grad_norm": 952.573486328125, "learning_rate": 9.909764205113747e-08, "loss": 82.1058, "step": 116680 }, { "epoch": 0.9652975968896058, "grad_norm": 1239.050048828125, "learning_rate": 9.881843393593882e-08, "loss": 83.7428, "step": 116690 }, { "epoch": 0.965380320138975, "grad_norm": 1195.249267578125, "learning_rate": 9.853961578526417e-08, "loss": 87.2989, "step": 116700 }, { "epoch": 0.9654630433883443, "grad_norm": 518.0953369140625, "learning_rate": 9.826118762129799e-08, "loss": 81.8561, "step": 116710 }, { "epoch": 0.9655457666377135, "grad_norm": 741.3461303710938, "learning_rate": 9.798314946619258e-08, "loss": 93.3796, "step": 116720 }, { "epoch": 0.9656284898870827, "grad_norm": 578.9274291992188, "learning_rate": 9.770550134207135e-08, "loss": 83.2529, "step": 116730 }, { "epoch": 0.965711213136452, "grad_norm": 700.8192138671875, "learning_rate": 9.74282432710244e-08, "loss": 67.5045, "step": 116740 }, { "epoch": 0.9657939363858212, "grad_norm": 965.154541015625, "learning_rate": 9.715137527511298e-08, "loss": 94.1983, "step": 116750 }, { "epoch": 0.9658766596351904, "grad_norm": 2566.7998046875, "learning_rate": 9.687489737636502e-08, "loss": 91.5708, "step": 116760 }, { "epoch": 0.9659593828845597, "grad_norm": 775.1097412109375, "learning_rate": 9.659880959677903e-08, "loss": 90.2111, "step": 116770 }, { "epoch": 0.9660421061339289, "grad_norm": 1181.168212890625, "learning_rate": 9.632311195832245e-08, "loss": 104.988, "step": 116780 }, { "epoch": 0.9661248293832981, "grad_norm": 812.8969116210938, "learning_rate": 9.604780448293105e-08, "loss": 74.2418, "step": 116790 }, { "epoch": 0.9662075526326674, "grad_norm": 762.244384765625, "learning_rate": 9.57728871925101e-08, "loss": 75.0032, "step": 116800 }, { "epoch": 0.9662902758820366, "grad_norm": 1320.201171875, "learning_rate": 9.549836010893265e-08, "loss": 107.8506, "step": 116810 }, { "epoch": 0.9663729991314058, "grad_norm": 821.8568725585938, "learning_rate": 9.522422325404234e-08, "loss": 91.6939, "step": 116820 }, { "epoch": 0.9664557223807752, "grad_norm": 1307.53466796875, "learning_rate": 9.495047664965063e-08, "loss": 97.0378, "step": 116830 }, { "epoch": 0.9665384456301443, "grad_norm": 891.2249145507812, "learning_rate": 9.467712031753839e-08, "loss": 80.264, "step": 116840 }, { "epoch": 0.9666211688795135, "grad_norm": 5029.44189453125, "learning_rate": 9.440415427945548e-08, "loss": 183.4319, "step": 116850 }, { "epoch": 0.9667038921288829, "grad_norm": 1075.81103515625, "learning_rate": 9.413157855712007e-08, "loss": 81.115, "step": 116860 }, { "epoch": 0.9667866153782521, "grad_norm": 3139.591796875, "learning_rate": 9.385939317221926e-08, "loss": 154.7868, "step": 116870 }, { "epoch": 0.9668693386276213, "grad_norm": 621.9048461914062, "learning_rate": 9.358759814641127e-08, "loss": 72.0611, "step": 116880 }, { "epoch": 0.9669520618769906, "grad_norm": 1159.2205810546875, "learning_rate": 9.331619350132049e-08, "loss": 98.6789, "step": 116890 }, { "epoch": 0.9670347851263598, "grad_norm": 1112.920654296875, "learning_rate": 9.304517925854184e-08, "loss": 83.0609, "step": 116900 }, { "epoch": 0.967117508375729, "grad_norm": 585.3701171875, "learning_rate": 9.277455543963809e-08, "loss": 100.5077, "step": 116910 }, { "epoch": 0.9672002316250983, "grad_norm": 488.8531494140625, "learning_rate": 9.250432206614258e-08, "loss": 84.3317, "step": 116920 }, { "epoch": 0.9672829548744675, "grad_norm": 479.1993103027344, "learning_rate": 9.22344791595553e-08, "loss": 71.9188, "step": 116930 }, { "epoch": 0.9673656781238367, "grad_norm": 1235.1121826171875, "learning_rate": 9.196502674134689e-08, "loss": 94.2771, "step": 116940 }, { "epoch": 0.967448401373206, "grad_norm": 991.1707153320312, "learning_rate": 9.169596483295628e-08, "loss": 105.2712, "step": 116950 }, { "epoch": 0.9675311246225752, "grad_norm": 1200.013427734375, "learning_rate": 9.142729345579193e-08, "loss": 92.9962, "step": 116960 }, { "epoch": 0.9676138478719444, "grad_norm": 757.19580078125, "learning_rate": 9.115901263123006e-08, "loss": 83.3054, "step": 116970 }, { "epoch": 0.9676965711213137, "grad_norm": 1006.8023071289062, "learning_rate": 9.089112238061692e-08, "loss": 100.9199, "step": 116980 }, { "epoch": 0.9677792943706829, "grad_norm": 668.6526489257812, "learning_rate": 9.062362272526825e-08, "loss": 91.3445, "step": 116990 }, { "epoch": 0.9678620176200521, "grad_norm": 1176.7666015625, "learning_rate": 9.035651368646647e-08, "loss": 89.7577, "step": 117000 }, { "epoch": 0.9679447408694214, "grad_norm": 1044.1488037109375, "learning_rate": 9.008979528546513e-08, "loss": 88.0212, "step": 117010 }, { "epoch": 0.9680274641187906, "grad_norm": 831.52197265625, "learning_rate": 8.982346754348503e-08, "loss": 84.6709, "step": 117020 }, { "epoch": 0.9681101873681598, "grad_norm": 926.8240356445312, "learning_rate": 8.955753048171645e-08, "loss": 73.5235, "step": 117030 }, { "epoch": 0.9681929106175291, "grad_norm": 1101.0938720703125, "learning_rate": 8.929198412131968e-08, "loss": 92.8208, "step": 117040 }, { "epoch": 0.9682756338668983, "grad_norm": 721.4671020507812, "learning_rate": 8.902682848342282e-08, "loss": 74.5471, "step": 117050 }, { "epoch": 0.9683583571162675, "grad_norm": 922.5578002929688, "learning_rate": 8.876206358912232e-08, "loss": 87.3108, "step": 117060 }, { "epoch": 0.9684410803656368, "grad_norm": 815.2578735351562, "learning_rate": 8.849768945948522e-08, "loss": 89.3768, "step": 117070 }, { "epoch": 0.968523803615006, "grad_norm": 905.2554321289062, "learning_rate": 8.823370611554638e-08, "loss": 80.9791, "step": 117080 }, { "epoch": 0.9686065268643752, "grad_norm": 972.8367919921875, "learning_rate": 8.797011357830953e-08, "loss": 137.1645, "step": 117090 }, { "epoch": 0.9686892501137445, "grad_norm": 931.77392578125, "learning_rate": 8.770691186874791e-08, "loss": 84.7926, "step": 117100 }, { "epoch": 0.9687719733631137, "grad_norm": 788.7830200195312, "learning_rate": 8.744410100780254e-08, "loss": 58.1295, "step": 117110 }, { "epoch": 0.9688546966124829, "grad_norm": 2165.523681640625, "learning_rate": 8.718168101638446e-08, "loss": 107.7649, "step": 117120 }, { "epoch": 0.9689374198618522, "grad_norm": 1699.0238037109375, "learning_rate": 8.69196519153731e-08, "loss": 119.7703, "step": 117130 }, { "epoch": 0.9690201431112214, "grad_norm": 1005.6736450195312, "learning_rate": 8.665801372561677e-08, "loss": 91.6887, "step": 117140 }, { "epoch": 0.9691028663605906, "grad_norm": 997.28857421875, "learning_rate": 8.639676646793382e-08, "loss": 78.077, "step": 117150 }, { "epoch": 0.9691855896099599, "grad_norm": 868.7511596679688, "learning_rate": 8.613591016310874e-08, "loss": 78.0869, "step": 117160 }, { "epoch": 0.9692683128593291, "grad_norm": 1064.0950927734375, "learning_rate": 8.58754448318988e-08, "loss": 82.3546, "step": 117170 }, { "epoch": 0.9693510361086983, "grad_norm": 615.5275268554688, "learning_rate": 8.561537049502688e-08, "loss": 108.2288, "step": 117180 }, { "epoch": 0.9694337593580676, "grad_norm": 714.61328125, "learning_rate": 8.535568717318533e-08, "loss": 79.6565, "step": 117190 }, { "epoch": 0.9695164826074368, "grad_norm": 695.4380493164062, "learning_rate": 8.509639488703703e-08, "loss": 81.4569, "step": 117200 }, { "epoch": 0.969599205856806, "grad_norm": 1146.906005859375, "learning_rate": 8.483749365721217e-08, "loss": 87.309, "step": 117210 }, { "epoch": 0.9696819291061753, "grad_norm": 1346.7725830078125, "learning_rate": 8.457898350430982e-08, "loss": 99.9304, "step": 117220 }, { "epoch": 0.9697646523555445, "grad_norm": 692.3303833007812, "learning_rate": 8.432086444889964e-08, "loss": 82.1651, "step": 117230 }, { "epoch": 0.9698473756049137, "grad_norm": 876.026123046875, "learning_rate": 8.406313651151799e-08, "loss": 124.7427, "step": 117240 }, { "epoch": 0.969930098854283, "grad_norm": 654.4212646484375, "learning_rate": 8.380579971267178e-08, "loss": 60.3869, "step": 117250 }, { "epoch": 0.9700128221036523, "grad_norm": 741.6707763671875, "learning_rate": 8.354885407283574e-08, "loss": 86.2612, "step": 117260 }, { "epoch": 0.9700955453530214, "grad_norm": 688.7070922851562, "learning_rate": 8.329229961245355e-08, "loss": 73.4818, "step": 117270 }, { "epoch": 0.9701782686023906, "grad_norm": 632.3734130859375, "learning_rate": 8.303613635193886e-08, "loss": 71.4801, "step": 117280 }, { "epoch": 0.97026099185176, "grad_norm": 1043.305419921875, "learning_rate": 8.278036431167313e-08, "loss": 108.113, "step": 117290 }, { "epoch": 0.9703437151011292, "grad_norm": 680.9979248046875, "learning_rate": 8.252498351200621e-08, "loss": 82.1075, "step": 117300 }, { "epoch": 0.9704264383504984, "grad_norm": 1170.890380859375, "learning_rate": 8.226999397325852e-08, "loss": 78.4652, "step": 117310 }, { "epoch": 0.9705091615998677, "grad_norm": 801.5345458984375, "learning_rate": 8.201539571571826e-08, "loss": 93.103, "step": 117320 }, { "epoch": 0.9705918848492369, "grad_norm": 694.1389770507812, "learning_rate": 8.176118875964201e-08, "loss": 105.483, "step": 117330 }, { "epoch": 0.9706746080986061, "grad_norm": 702.7589111328125, "learning_rate": 8.150737312525692e-08, "loss": 80.5185, "step": 117340 }, { "epoch": 0.9707573313479754, "grad_norm": 735.4678955078125, "learning_rate": 8.125394883275683e-08, "loss": 95.8936, "step": 117350 }, { "epoch": 0.9708400545973446, "grad_norm": 704.5406494140625, "learning_rate": 8.100091590230618e-08, "loss": 103.3761, "step": 117360 }, { "epoch": 0.9709227778467138, "grad_norm": 966.3074340820312, "learning_rate": 8.07482743540372e-08, "loss": 101.2361, "step": 117370 }, { "epoch": 0.9710055010960831, "grad_norm": 1027.426513671875, "learning_rate": 8.049602420805214e-08, "loss": 84.4538, "step": 117380 }, { "epoch": 0.9710882243454523, "grad_norm": 901.7965087890625, "learning_rate": 8.024416548442104e-08, "loss": 86.5814, "step": 117390 }, { "epoch": 0.9711709475948215, "grad_norm": 1091.1844482421875, "learning_rate": 7.99926982031829e-08, "loss": 99.3349, "step": 117400 }, { "epoch": 0.9712536708441908, "grad_norm": 709.89697265625, "learning_rate": 7.974162238434557e-08, "loss": 76.4967, "step": 117410 }, { "epoch": 0.97133639409356, "grad_norm": 859.7228393554688, "learning_rate": 7.949093804788699e-08, "loss": 87.3181, "step": 117420 }, { "epoch": 0.9714191173429292, "grad_norm": 662.0986328125, "learning_rate": 7.924064521375174e-08, "loss": 80.255, "step": 117430 }, { "epoch": 0.9715018405922985, "grad_norm": 804.8336791992188, "learning_rate": 7.899074390185557e-08, "loss": 82.7631, "step": 117440 }, { "epoch": 0.9715845638416677, "grad_norm": 800.1364135742188, "learning_rate": 7.874123413208145e-08, "loss": 74.2819, "step": 117450 }, { "epoch": 0.9716672870910369, "grad_norm": 465.7279052734375, "learning_rate": 7.849211592428186e-08, "loss": 66.4609, "step": 117460 }, { "epoch": 0.9717500103404062, "grad_norm": 444.983154296875, "learning_rate": 7.824338929827813e-08, "loss": 72.5292, "step": 117470 }, { "epoch": 0.9718327335897754, "grad_norm": 253.49969482421875, "learning_rate": 7.799505427386001e-08, "loss": 67.4403, "step": 117480 }, { "epoch": 0.9719154568391446, "grad_norm": 559.1021118164062, "learning_rate": 7.774711087078612e-08, "loss": 71.8914, "step": 117490 }, { "epoch": 0.9719981800885139, "grad_norm": 1051.5477294921875, "learning_rate": 7.749955910878459e-08, "loss": 80.7977, "step": 117500 }, { "epoch": 0.9720809033378831, "grad_norm": 739.0164794921875, "learning_rate": 7.725239900755244e-08, "loss": 62.2263, "step": 117510 }, { "epoch": 0.9721636265872523, "grad_norm": 1051.24560546875, "learning_rate": 7.700563058675448e-08, "loss": 72.5346, "step": 117520 }, { "epoch": 0.9722463498366216, "grad_norm": 876.2381591796875, "learning_rate": 7.67592538660239e-08, "loss": 93.1742, "step": 117530 }, { "epoch": 0.9723290730859908, "grad_norm": 892.1935424804688, "learning_rate": 7.651326886496613e-08, "loss": 75.8199, "step": 117540 }, { "epoch": 0.97241179633536, "grad_norm": 513.9732055664062, "learning_rate": 7.626767560315107e-08, "loss": 75.7576, "step": 117550 }, { "epoch": 0.9724945195847293, "grad_norm": 822.49267578125, "learning_rate": 7.602247410012032e-08, "loss": 82.0519, "step": 117560 }, { "epoch": 0.9725772428340985, "grad_norm": 927.3632202148438, "learning_rate": 7.577766437538325e-08, "loss": 98.2942, "step": 117570 }, { "epoch": 0.9726599660834677, "grad_norm": 772.4122314453125, "learning_rate": 7.553324644841875e-08, "loss": 106.7597, "step": 117580 }, { "epoch": 0.972742689332837, "grad_norm": 796.9185791015625, "learning_rate": 7.528922033867347e-08, "loss": 103.0361, "step": 117590 }, { "epoch": 0.9728254125822062, "grad_norm": 881.1489868164062, "learning_rate": 7.5045586065563e-08, "loss": 84.0167, "step": 117600 }, { "epoch": 0.9729081358315754, "grad_norm": 965.630615234375, "learning_rate": 7.480234364847349e-08, "loss": 93.8757, "step": 117610 }, { "epoch": 0.9729908590809447, "grad_norm": 820.9173583984375, "learning_rate": 7.455949310675725e-08, "loss": 76.266, "step": 117620 }, { "epoch": 0.9730735823303139, "grad_norm": 748.765625, "learning_rate": 7.43170344597377e-08, "loss": 102.896, "step": 117630 }, { "epoch": 0.9731563055796831, "grad_norm": 604.3673095703125, "learning_rate": 7.407496772670609e-08, "loss": 62.3851, "step": 117640 }, { "epoch": 0.9732390288290524, "grad_norm": 783.4528198242188, "learning_rate": 7.383329292692198e-08, "loss": 89.3814, "step": 117650 }, { "epoch": 0.9733217520784216, "grad_norm": 1100.453857421875, "learning_rate": 7.359201007961503e-08, "loss": 75.2225, "step": 117660 }, { "epoch": 0.9734044753277908, "grad_norm": 647.7412109375, "learning_rate": 7.335111920398263e-08, "loss": 84.3431, "step": 117670 }, { "epoch": 0.9734871985771602, "grad_norm": 796.82421875, "learning_rate": 7.311062031919114e-08, "loss": 103.5371, "step": 117680 }, { "epoch": 0.9735699218265293, "grad_norm": 915.2636108398438, "learning_rate": 7.28705134443769e-08, "loss": 82.3625, "step": 117690 }, { "epoch": 0.9736526450758985, "grad_norm": 1407.7706298828125, "learning_rate": 7.263079859864298e-08, "loss": 99.5093, "step": 117700 }, { "epoch": 0.9737353683252679, "grad_norm": 515.5316162109375, "learning_rate": 7.239147580106242e-08, "loss": 85.3771, "step": 117710 }, { "epoch": 0.9738180915746371, "grad_norm": 635.7969970703125, "learning_rate": 7.215254507067782e-08, "loss": 83.8497, "step": 117720 }, { "epoch": 0.9739008148240063, "grad_norm": 915.8248901367188, "learning_rate": 7.191400642649893e-08, "loss": 72.568, "step": 117730 }, { "epoch": 0.9739835380733756, "grad_norm": 774.489013671875, "learning_rate": 7.167585988750669e-08, "loss": 101.6645, "step": 117740 }, { "epoch": 0.9740662613227448, "grad_norm": 866.99560546875, "learning_rate": 7.143810547264762e-08, "loss": 61.4445, "step": 117750 }, { "epoch": 0.974148984572114, "grad_norm": 642.11474609375, "learning_rate": 7.120074320083991e-08, "loss": 64.4058, "step": 117760 }, { "epoch": 0.9742317078214833, "grad_norm": 1115.784423828125, "learning_rate": 7.096377309096846e-08, "loss": 76.0183, "step": 117770 }, { "epoch": 0.9743144310708525, "grad_norm": 734.0995483398438, "learning_rate": 7.072719516188875e-08, "loss": 96.2746, "step": 117780 }, { "epoch": 0.9743971543202217, "grad_norm": 525.3031616210938, "learning_rate": 7.049100943242404e-08, "loss": 76.8062, "step": 117790 }, { "epoch": 0.974479877569591, "grad_norm": 831.6923217773438, "learning_rate": 7.025521592136597e-08, "loss": 73.2237, "step": 117800 }, { "epoch": 0.9745626008189602, "grad_norm": 1876.879150390625, "learning_rate": 7.001981464747565e-08, "loss": 87.2098, "step": 117810 }, { "epoch": 0.9746453240683294, "grad_norm": 811.3916015625, "learning_rate": 6.978480562948309e-08, "loss": 94.3064, "step": 117820 }, { "epoch": 0.9747280473176987, "grad_norm": 1149.4193115234375, "learning_rate": 6.955018888608722e-08, "loss": 100.6052, "step": 117830 }, { "epoch": 0.9748107705670679, "grad_norm": 863.6624145507812, "learning_rate": 6.931596443595478e-08, "loss": 109.152, "step": 117840 }, { "epoch": 0.9748934938164371, "grad_norm": 771.5863647460938, "learning_rate": 6.908213229772254e-08, "loss": 67.1083, "step": 117850 }, { "epoch": 0.9749762170658064, "grad_norm": 581.7706298828125, "learning_rate": 6.884869248999504e-08, "loss": 92.5001, "step": 117860 }, { "epoch": 0.9750589403151756, "grad_norm": 790.7437133789062, "learning_rate": 6.861564503134688e-08, "loss": 98.9197, "step": 117870 }, { "epoch": 0.9751416635645448, "grad_norm": 710.2706909179688, "learning_rate": 6.838298994031933e-08, "loss": 100.5049, "step": 117880 }, { "epoch": 0.9752243868139141, "grad_norm": 656.0684814453125, "learning_rate": 6.815072723542426e-08, "loss": 85.3911, "step": 117890 }, { "epoch": 0.9753071100632833, "grad_norm": 482.3664245605469, "learning_rate": 6.791885693514134e-08, "loss": 58.3653, "step": 117900 }, { "epoch": 0.9753898333126525, "grad_norm": 1170.74169921875, "learning_rate": 6.768737905792022e-08, "loss": 84.8319, "step": 117910 }, { "epoch": 0.9754725565620218, "grad_norm": 755.68017578125, "learning_rate": 6.745629362217731e-08, "loss": 113.3002, "step": 117920 }, { "epoch": 0.975555279811391, "grad_norm": 776.3069458007812, "learning_rate": 6.722560064630013e-08, "loss": 65.8851, "step": 117930 }, { "epoch": 0.9756380030607602, "grad_norm": 1081.909912109375, "learning_rate": 6.699530014864397e-08, "loss": 92.803, "step": 117940 }, { "epoch": 0.9757207263101295, "grad_norm": 552.3186645507812, "learning_rate": 6.676539214753253e-08, "loss": 92.8013, "step": 117950 }, { "epoch": 0.9758034495594987, "grad_norm": 771.9279174804688, "learning_rate": 6.653587666125782e-08, "loss": 74.0391, "step": 117960 }, { "epoch": 0.9758861728088679, "grad_norm": 670.3401489257812, "learning_rate": 6.630675370808193e-08, "loss": 77.2455, "step": 117970 }, { "epoch": 0.9759688960582372, "grad_norm": 1167.330810546875, "learning_rate": 6.607802330623525e-08, "loss": 87.141, "step": 117980 }, { "epoch": 0.9760516193076064, "grad_norm": 347.4068908691406, "learning_rate": 6.584968547391657e-08, "loss": 72.6703, "step": 117990 }, { "epoch": 0.9761343425569756, "grad_norm": 1017.5067749023438, "learning_rate": 6.562174022929358e-08, "loss": 84.4651, "step": 118000 }, { "epoch": 0.9762170658063448, "grad_norm": 818.614013671875, "learning_rate": 6.539418759050286e-08, "loss": 78.0972, "step": 118010 }, { "epoch": 0.9762997890557141, "grad_norm": 870.0516357421875, "learning_rate": 6.516702757564941e-08, "loss": 60.4278, "step": 118020 }, { "epoch": 0.9763825123050833, "grad_norm": 1681.1678466796875, "learning_rate": 6.494026020280875e-08, "loss": 94.2777, "step": 118030 }, { "epoch": 0.9764652355544525, "grad_norm": 767.0552978515625, "learning_rate": 6.471388549002255e-08, "loss": 96.2769, "step": 118040 }, { "epoch": 0.9765479588038218, "grad_norm": 904.4722900390625, "learning_rate": 6.448790345530253e-08, "loss": 73.3652, "step": 118050 }, { "epoch": 0.976630682053191, "grad_norm": 916.374267578125, "learning_rate": 6.426231411662876e-08, "loss": 75.4504, "step": 118060 }, { "epoch": 0.9767134053025602, "grad_norm": 327.85504150390625, "learning_rate": 6.403711749195073e-08, "loss": 55.0961, "step": 118070 }, { "epoch": 0.9767961285519295, "grad_norm": 794.025146484375, "learning_rate": 6.381231359918638e-08, "loss": 82.1348, "step": 118080 }, { "epoch": 0.9768788518012987, "grad_norm": 971.2992553710938, "learning_rate": 6.358790245622193e-08, "loss": 65.286, "step": 118090 }, { "epoch": 0.9769615750506679, "grad_norm": 382.24456787109375, "learning_rate": 6.336388408091366e-08, "loss": 81.6362, "step": 118100 }, { "epoch": 0.9770442983000373, "grad_norm": 700.8916625976562, "learning_rate": 6.314025849108397e-08, "loss": 66.6585, "step": 118110 }, { "epoch": 0.9771270215494064, "grad_norm": 598.8179321289062, "learning_rate": 6.291702570452806e-08, "loss": 87.4747, "step": 118120 }, { "epoch": 0.9772097447987756, "grad_norm": 832.1001586914062, "learning_rate": 6.269418573900565e-08, "loss": 78.9643, "step": 118130 }, { "epoch": 0.977292468048145, "grad_norm": 1039.1939697265625, "learning_rate": 6.247173861224753e-08, "loss": 125.8844, "step": 118140 }, { "epoch": 0.9773751912975142, "grad_norm": 537.9774169921875, "learning_rate": 6.224968434195289e-08, "loss": 60.1959, "step": 118150 }, { "epoch": 0.9774579145468834, "grad_norm": 1239.988037109375, "learning_rate": 6.202802294578981e-08, "loss": 96.3588, "step": 118160 }, { "epoch": 0.9775406377962527, "grad_norm": 629.1329345703125, "learning_rate": 6.180675444139527e-08, "loss": 78.8181, "step": 118170 }, { "epoch": 0.9776233610456219, "grad_norm": 1456.6697998046875, "learning_rate": 6.158587884637357e-08, "loss": 83.5854, "step": 118180 }, { "epoch": 0.9777060842949911, "grad_norm": 694.644775390625, "learning_rate": 6.136539617829895e-08, "loss": 75.1875, "step": 118190 }, { "epoch": 0.9777888075443604, "grad_norm": 1134.1533203125, "learning_rate": 6.114530645471461e-08, "loss": 95.2716, "step": 118200 }, { "epoch": 0.9778715307937296, "grad_norm": 819.7823486328125, "learning_rate": 6.09256096931321e-08, "loss": 93.0044, "step": 118210 }, { "epoch": 0.9779542540430988, "grad_norm": 874.4256591796875, "learning_rate": 6.070630591103188e-08, "loss": 77.5878, "step": 118220 }, { "epoch": 0.9780369772924681, "grad_norm": 941.7909545898438, "learning_rate": 6.048739512586221e-08, "loss": 106.6546, "step": 118230 }, { "epoch": 0.9781197005418373, "grad_norm": 496.1390075683594, "learning_rate": 6.026887735504083e-08, "loss": 70.8411, "step": 118240 }, { "epoch": 0.9782024237912065, "grad_norm": 1133.30517578125, "learning_rate": 6.005075261595495e-08, "loss": 62.7111, "step": 118250 }, { "epoch": 0.9782851470405758, "grad_norm": 1455.4931640625, "learning_rate": 5.983302092595955e-08, "loss": 82.0333, "step": 118260 }, { "epoch": 0.978367870289945, "grad_norm": 369.7554626464844, "learning_rate": 5.961568230237858e-08, "loss": 109.3388, "step": 118270 }, { "epoch": 0.9784505935393142, "grad_norm": 983.8111572265625, "learning_rate": 5.939873676250374e-08, "loss": 74.7944, "step": 118280 }, { "epoch": 0.9785333167886835, "grad_norm": 437.4408874511719, "learning_rate": 5.91821843235979e-08, "loss": 82.0863, "step": 118290 }, { "epoch": 0.9786160400380527, "grad_norm": 1494.396484375, "learning_rate": 5.8966025002889505e-08, "loss": 103.6332, "step": 118300 }, { "epoch": 0.9786987632874219, "grad_norm": 637.7553100585938, "learning_rate": 5.8750258817578676e-08, "loss": 59.2877, "step": 118310 }, { "epoch": 0.9787814865367912, "grad_norm": 802.739990234375, "learning_rate": 5.85348857848328e-08, "loss": 55.1885, "step": 118320 }, { "epoch": 0.9788642097861604, "grad_norm": 941.2757568359375, "learning_rate": 5.8319905921787603e-08, "loss": 69.7701, "step": 118330 }, { "epoch": 0.9789469330355296, "grad_norm": 1327.10888671875, "learning_rate": 5.810531924554774e-08, "loss": 87.9876, "step": 118340 }, { "epoch": 0.9790296562848989, "grad_norm": 1318.0863037109375, "learning_rate": 5.7891125773187896e-08, "loss": 139.0294, "step": 118350 }, { "epoch": 0.9791123795342681, "grad_norm": 1360.8392333984375, "learning_rate": 5.7677325521749983e-08, "loss": 68.9617, "step": 118360 }, { "epoch": 0.9791951027836373, "grad_norm": 1109.0506591796875, "learning_rate": 5.746391850824484e-08, "loss": 122.1048, "step": 118370 }, { "epoch": 0.9792778260330066, "grad_norm": 1080.967041015625, "learning_rate": 5.725090474965278e-08, "loss": 67.3166, "step": 118380 }, { "epoch": 0.9793605492823758, "grad_norm": 738.1102905273438, "learning_rate": 5.703828426292191e-08, "loss": 103.1861, "step": 118390 }, { "epoch": 0.979443272531745, "grad_norm": 1060.9788818359375, "learning_rate": 5.6826057064969244e-08, "loss": 83.6084, "step": 118400 }, { "epoch": 0.9795259957811143, "grad_norm": 731.0538330078125, "learning_rate": 5.6614223172681836e-08, "loss": 100.0388, "step": 118410 }, { "epoch": 0.9796087190304835, "grad_norm": 1262.67578125, "learning_rate": 5.640278260291287e-08, "loss": 83.6562, "step": 118420 }, { "epoch": 0.9796914422798527, "grad_norm": 726.7060546875, "learning_rate": 5.6191735372487235e-08, "loss": 89.0712, "step": 118430 }, { "epoch": 0.979774165529222, "grad_norm": 708.3402099609375, "learning_rate": 5.5981081498195365e-08, "loss": 108.3079, "step": 118440 }, { "epoch": 0.9798568887785912, "grad_norm": 731.2184448242188, "learning_rate": 5.577082099679942e-08, "loss": 64.9039, "step": 118450 }, { "epoch": 0.9799396120279604, "grad_norm": 673.4403076171875, "learning_rate": 5.556095388502824e-08, "loss": 95.9862, "step": 118460 }, { "epoch": 0.9800223352773297, "grad_norm": 1423.2120361328125, "learning_rate": 5.535148017958014e-08, "loss": 81.7384, "step": 118470 }, { "epoch": 0.9801050585266989, "grad_norm": 706.9336547851562, "learning_rate": 5.514239989712178e-08, "loss": 82.0284, "step": 118480 }, { "epoch": 0.9801877817760681, "grad_norm": 711.587646484375, "learning_rate": 5.493371305428874e-08, "loss": 101.2837, "step": 118490 }, { "epoch": 0.9802705050254374, "grad_norm": 993.87353515625, "learning_rate": 5.472541966768552e-08, "loss": 74.9229, "step": 118500 }, { "epoch": 0.9803532282748066, "grad_norm": 746.7269287109375, "learning_rate": 5.451751975388442e-08, "loss": 77.2581, "step": 118510 }, { "epoch": 0.9804359515241758, "grad_norm": 1050.691162109375, "learning_rate": 5.4310013329428314e-08, "loss": 88.0851, "step": 118520 }, { "epoch": 0.9805186747735452, "grad_norm": 496.2442321777344, "learning_rate": 5.410290041082622e-08, "loss": 141.8692, "step": 118530 }, { "epoch": 0.9806013980229144, "grad_norm": 484.4472351074219, "learning_rate": 5.3896181014557733e-08, "loss": 62.8719, "step": 118540 }, { "epoch": 0.9806841212722835, "grad_norm": 1243.587646484375, "learning_rate": 5.368985515707137e-08, "loss": 120.0687, "step": 118550 }, { "epoch": 0.9807668445216529, "grad_norm": 533.1880493164062, "learning_rate": 5.348392285478232e-08, "loss": 66.4509, "step": 118560 }, { "epoch": 0.9808495677710221, "grad_norm": 1625.4766845703125, "learning_rate": 5.327838412407582e-08, "loss": 110.9632, "step": 118570 }, { "epoch": 0.9809322910203913, "grad_norm": 521.4833374023438, "learning_rate": 5.3073238981305455e-08, "loss": 88.1412, "step": 118580 }, { "epoch": 0.9810150142697606, "grad_norm": 671.5929565429688, "learning_rate": 5.2868487442794825e-08, "loss": 59.3454, "step": 118590 }, { "epoch": 0.9810977375191298, "grad_norm": 717.1107177734375, "learning_rate": 5.266412952483424e-08, "loss": 101.9609, "step": 118600 }, { "epoch": 0.981180460768499, "grad_norm": 657.0799560546875, "learning_rate": 5.246016524368347e-08, "loss": 88.4751, "step": 118610 }, { "epoch": 0.9812631840178683, "grad_norm": 616.4298706054688, "learning_rate": 5.225659461557176e-08, "loss": 111.9666, "step": 118620 }, { "epoch": 0.9813459072672375, "grad_norm": 525.951416015625, "learning_rate": 5.205341765669503e-08, "loss": 113.7892, "step": 118630 }, { "epoch": 0.9814286305166067, "grad_norm": 701.6843872070312, "learning_rate": 5.185063438322091e-08, "loss": 59.8481, "step": 118640 }, { "epoch": 0.981511353765976, "grad_norm": 768.6402587890625, "learning_rate": 5.1648244811282054e-08, "loss": 75.6018, "step": 118650 }, { "epoch": 0.9815940770153452, "grad_norm": 1144.209716796875, "learning_rate": 5.14462489569828e-08, "loss": 80.7018, "step": 118660 }, { "epoch": 0.9816768002647144, "grad_norm": 1187.2999267578125, "learning_rate": 5.1244646836394187e-08, "loss": 93.3386, "step": 118670 }, { "epoch": 0.9817595235140837, "grad_norm": 1293.2803955078125, "learning_rate": 5.104343846555726e-08, "loss": 73.9802, "step": 118680 }, { "epoch": 0.9818422467634529, "grad_norm": 918.84765625, "learning_rate": 5.0842623860482e-08, "loss": 94.4681, "step": 118690 }, { "epoch": 0.9819249700128221, "grad_norm": 558.7251586914062, "learning_rate": 5.064220303714507e-08, "loss": 82.5352, "step": 118700 }, { "epoch": 0.9820076932621913, "grad_norm": 1152.9447021484375, "learning_rate": 5.044217601149371e-08, "loss": 65.6448, "step": 118710 }, { "epoch": 0.9820904165115606, "grad_norm": 535.541748046875, "learning_rate": 5.024254279944296e-08, "loss": 72.6428, "step": 118720 }, { "epoch": 0.9821731397609298, "grad_norm": 2769.91845703125, "learning_rate": 5.004330341687735e-08, "loss": 102.4574, "step": 118730 }, { "epoch": 0.982255863010299, "grad_norm": 680.4102172851562, "learning_rate": 4.9844457879648086e-08, "loss": 94.1751, "step": 118740 }, { "epoch": 0.9823385862596683, "grad_norm": 941.7730712890625, "learning_rate": 4.9646006203577515e-08, "loss": 112.8078, "step": 118750 }, { "epoch": 0.9824213095090375, "grad_norm": 1552.2708740234375, "learning_rate": 4.944794840445521e-08, "loss": 78.17, "step": 118760 }, { "epoch": 0.9825040327584067, "grad_norm": 1019.7816772460938, "learning_rate": 4.9250284498039146e-08, "loss": 65.3781, "step": 118770 }, { "epoch": 0.982586756007776, "grad_norm": 658.9552001953125, "learning_rate": 4.905301450005784e-08, "loss": 74.2217, "step": 118780 }, { "epoch": 0.9826694792571452, "grad_norm": 731.14111328125, "learning_rate": 4.885613842620596e-08, "loss": 93.1678, "step": 118790 }, { "epoch": 0.9827522025065144, "grad_norm": 1084.143310546875, "learning_rate": 4.865965629214819e-08, "loss": 86.729, "step": 118800 }, { "epoch": 0.9828349257558837, "grad_norm": 621.4204711914062, "learning_rate": 4.846356811351871e-08, "loss": 87.1071, "step": 118810 }, { "epoch": 0.9829176490052529, "grad_norm": 686.3203735351562, "learning_rate": 4.826787390591836e-08, "loss": 66.5393, "step": 118820 }, { "epoch": 0.9830003722546221, "grad_norm": 380.79351806640625, "learning_rate": 4.8072573684918024e-08, "loss": 60.1358, "step": 118830 }, { "epoch": 0.9830830955039914, "grad_norm": 1424.990966796875, "learning_rate": 4.787766746605638e-08, "loss": 97.6002, "step": 118840 }, { "epoch": 0.9831658187533606, "grad_norm": 489.2065734863281, "learning_rate": 4.768315526484158e-08, "loss": 69.9402, "step": 118850 }, { "epoch": 0.9832485420027298, "grad_norm": 868.9462280273438, "learning_rate": 4.7489037096750126e-08, "loss": 106.7428, "step": 118860 }, { "epoch": 0.9833312652520991, "grad_norm": 2591.3154296875, "learning_rate": 4.7295312977226895e-08, "loss": 102.9899, "step": 118870 }, { "epoch": 0.9834139885014683, "grad_norm": 1134.68212890625, "learning_rate": 4.710198292168566e-08, "loss": 78.4484, "step": 118880 }, { "epoch": 0.9834967117508375, "grad_norm": 805.161376953125, "learning_rate": 4.690904694550913e-08, "loss": 88.5338, "step": 118890 }, { "epoch": 0.9835794350002068, "grad_norm": 747.1693115234375, "learning_rate": 4.671650506404835e-08, "loss": 83.3935, "step": 118900 }, { "epoch": 0.983662158249576, "grad_norm": 807.8037109375, "learning_rate": 4.6524357292622724e-08, "loss": 78.79, "step": 118910 }, { "epoch": 0.9837448814989452, "grad_norm": 695.4989013671875, "learning_rate": 4.633260364652059e-08, "loss": 82.1724, "step": 118920 }, { "epoch": 0.9838276047483145, "grad_norm": 840.2431640625, "learning_rate": 4.6141244140998634e-08, "loss": 73.6012, "step": 118930 }, { "epoch": 0.9839103279976837, "grad_norm": 857.3699340820312, "learning_rate": 4.5950278791283e-08, "loss": 75.8136, "step": 118940 }, { "epoch": 0.9839930512470529, "grad_norm": 1018.6058959960938, "learning_rate": 4.575970761256765e-08, "loss": 85.1594, "step": 118950 }, { "epoch": 0.9840757744964223, "grad_norm": 1265.7125244140625, "learning_rate": 4.556953062001546e-08, "loss": 89.4971, "step": 118960 }, { "epoch": 0.9841584977457914, "grad_norm": 735.5936279296875, "learning_rate": 4.5379747828757095e-08, "loss": 81.9222, "step": 118970 }, { "epoch": 0.9842412209951606, "grad_norm": 881.5132446289062, "learning_rate": 4.5190359253894925e-08, "loss": 88.397, "step": 118980 }, { "epoch": 0.98432394424453, "grad_norm": 353.5128479003906, "learning_rate": 4.500136491049578e-08, "loss": 83.2312, "step": 118990 }, { "epoch": 0.9844066674938992, "grad_norm": 1424.359130859375, "learning_rate": 4.481276481359764e-08, "loss": 86.6353, "step": 119000 }, { "epoch": 0.9844893907432684, "grad_norm": 887.9814453125, "learning_rate": 4.462455897820628e-08, "loss": 76.8451, "step": 119010 }, { "epoch": 0.9845721139926377, "grad_norm": 806.23193359375, "learning_rate": 4.443674741929693e-08, "loss": 66.0883, "step": 119020 }, { "epoch": 0.9846548372420069, "grad_norm": 573.1680297851562, "learning_rate": 4.424933015181265e-08, "loss": 72.9488, "step": 119030 }, { "epoch": 0.9847375604913761, "grad_norm": 976.209716796875, "learning_rate": 4.40623071906654e-08, "loss": 84.5281, "step": 119040 }, { "epoch": 0.9848202837407454, "grad_norm": 947.809814453125, "learning_rate": 4.387567855073604e-08, "loss": 89.569, "step": 119050 }, { "epoch": 0.9849030069901146, "grad_norm": 1052.1142578125, "learning_rate": 4.368944424687271e-08, "loss": 69.5182, "step": 119060 }, { "epoch": 0.9849857302394838, "grad_norm": 498.6680603027344, "learning_rate": 4.350360429389411e-08, "loss": 92.5021, "step": 119070 }, { "epoch": 0.9850684534888531, "grad_norm": 878.2149047851562, "learning_rate": 4.3318158706586734e-08, "loss": 60.1033, "step": 119080 }, { "epoch": 0.9851511767382223, "grad_norm": 618.4829711914062, "learning_rate": 4.3133107499704894e-08, "loss": 80.3329, "step": 119090 }, { "epoch": 0.9852338999875915, "grad_norm": 544.2587280273438, "learning_rate": 4.294845068797349e-08, "loss": 79.2608, "step": 119100 }, { "epoch": 0.9853166232369608, "grad_norm": 989.1095581054688, "learning_rate": 4.276418828608353e-08, "loss": 69.4792, "step": 119110 }, { "epoch": 0.98539934648633, "grad_norm": 1151.3955078125, "learning_rate": 4.258032030869608e-08, "loss": 83.3888, "step": 119120 }, { "epoch": 0.9854820697356992, "grad_norm": 628.578857421875, "learning_rate": 4.2396846770441644e-08, "loss": 103.2333, "step": 119130 }, { "epoch": 0.9855647929850685, "grad_norm": 830.7070922851562, "learning_rate": 4.221376768591801e-08, "loss": 83.7114, "step": 119140 }, { "epoch": 0.9856475162344377, "grad_norm": 343.4464111328125, "learning_rate": 4.203108306969128e-08, "loss": 86.0502, "step": 119150 }, { "epoch": 0.9857302394838069, "grad_norm": 621.4478759765625, "learning_rate": 4.1848792936297064e-08, "loss": 83.2397, "step": 119160 }, { "epoch": 0.9858129627331762, "grad_norm": 964.1356811523438, "learning_rate": 4.166689730023987e-08, "loss": 77.1152, "step": 119170 }, { "epoch": 0.9858956859825454, "grad_norm": 818.7540893554688, "learning_rate": 4.148539617599201e-08, "loss": 92.1264, "step": 119180 }, { "epoch": 0.9859784092319146, "grad_norm": 1049.2435302734375, "learning_rate": 4.13042895779947e-08, "loss": 92.8093, "step": 119190 }, { "epoch": 0.9860611324812839, "grad_norm": 622.9645385742188, "learning_rate": 4.112357752065754e-08, "loss": 73.6431, "step": 119200 }, { "epoch": 0.9861438557306531, "grad_norm": 934.980224609375, "learning_rate": 4.0943260018359024e-08, "loss": 102.3326, "step": 119210 }, { "epoch": 0.9862265789800223, "grad_norm": 775.8395385742188, "learning_rate": 4.076333708544655e-08, "loss": 99.6095, "step": 119220 }, { "epoch": 0.9863093022293916, "grad_norm": 561.11181640625, "learning_rate": 4.058380873623591e-08, "loss": 60.4265, "step": 119230 }, { "epoch": 0.9863920254787608, "grad_norm": 1175.651123046875, "learning_rate": 4.040467498501011e-08, "loss": 102.5078, "step": 119240 }, { "epoch": 0.98647474872813, "grad_norm": 1031.730712890625, "learning_rate": 4.02259358460233e-08, "loss": 107.7175, "step": 119250 }, { "epoch": 0.9865574719774993, "grad_norm": 1173.889404296875, "learning_rate": 4.00475913334969e-08, "loss": 112.8835, "step": 119260 }, { "epoch": 0.9866401952268685, "grad_norm": 747.98486328125, "learning_rate": 3.986964146162009e-08, "loss": 72.1821, "step": 119270 }, { "epoch": 0.9867229184762377, "grad_norm": 904.0946044921875, "learning_rate": 3.969208624455212e-08, "loss": 78.2023, "step": 119280 }, { "epoch": 0.986805641725607, "grad_norm": 761.3492431640625, "learning_rate": 3.951492569642001e-08, "loss": 83.0522, "step": 119290 }, { "epoch": 0.9868883649749762, "grad_norm": 1574.1885986328125, "learning_rate": 3.9338159831319724e-08, "loss": 108.7734, "step": 119300 }, { "epoch": 0.9869710882243454, "grad_norm": 774.9620971679688, "learning_rate": 3.9161788663315546e-08, "loss": 79.6705, "step": 119310 }, { "epoch": 0.9870538114737147, "grad_norm": 1015.9965209960938, "learning_rate": 3.898581220644071e-08, "loss": 126.5208, "step": 119320 }, { "epoch": 0.9871365347230839, "grad_norm": 1341.8759765625, "learning_rate": 3.881023047469679e-08, "loss": 90.6255, "step": 119330 }, { "epoch": 0.9872192579724531, "grad_norm": 511.98223876953125, "learning_rate": 3.863504348205427e-08, "loss": 85.9824, "step": 119340 }, { "epoch": 0.9873019812218224, "grad_norm": 1165.170654296875, "learning_rate": 3.8460251242451454e-08, "loss": 123.9411, "step": 119350 }, { "epoch": 0.9873847044711916, "grad_norm": 1152.4227294921875, "learning_rate": 3.828585376979666e-08, "loss": 91.7785, "step": 119360 }, { "epoch": 0.9874674277205608, "grad_norm": 684.0379638671875, "learning_rate": 3.811185107796489e-08, "loss": 69.7867, "step": 119370 }, { "epoch": 0.9875501509699302, "grad_norm": 664.3668823242188, "learning_rate": 3.793824318080064e-08, "loss": 74.003, "step": 119380 }, { "epoch": 0.9876328742192994, "grad_norm": 800.0819091796875, "learning_rate": 3.7765030092118404e-08, "loss": 138.9721, "step": 119390 }, { "epoch": 0.9877155974686685, "grad_norm": 845.6797485351562, "learning_rate": 3.7592211825698835e-08, "loss": 104.952, "step": 119400 }, { "epoch": 0.9877983207180379, "grad_norm": 942.4180297851562, "learning_rate": 3.741978839529259e-08, "loss": 112.8675, "step": 119410 }, { "epoch": 0.9878810439674071, "grad_norm": 1062.473876953125, "learning_rate": 3.72477598146187e-08, "loss": 90.5084, "step": 119420 }, { "epoch": 0.9879637672167763, "grad_norm": 636.2952270507812, "learning_rate": 3.7076126097363997e-08, "loss": 70.5935, "step": 119430 }, { "epoch": 0.9880464904661455, "grad_norm": 446.5930480957031, "learning_rate": 3.690488725718588e-08, "loss": 109.0157, "step": 119440 }, { "epoch": 0.9881292137155148, "grad_norm": 1208.2958984375, "learning_rate": 3.673404330770847e-08, "loss": 64.4161, "step": 119450 }, { "epoch": 0.988211936964884, "grad_norm": 501.6045227050781, "learning_rate": 3.65635942625242e-08, "loss": 64.2057, "step": 119460 }, { "epoch": 0.9882946602142532, "grad_norm": 1004.0560913085938, "learning_rate": 3.639354013519614e-08, "loss": 118.3625, "step": 119470 }, { "epoch": 0.9883773834636225, "grad_norm": 1431.545166015625, "learning_rate": 3.6223880939254e-08, "loss": 111.376, "step": 119480 }, { "epoch": 0.9884601067129917, "grad_norm": 717.1210327148438, "learning_rate": 3.605461668819754e-08, "loss": 81.6542, "step": 119490 }, { "epoch": 0.9885428299623609, "grad_norm": 844.7835083007812, "learning_rate": 3.588574739549322e-08, "loss": 92.5829, "step": 119500 }, { "epoch": 0.9886255532117302, "grad_norm": 910.1040649414062, "learning_rate": 3.571727307457806e-08, "loss": 111.628, "step": 119510 }, { "epoch": 0.9887082764610994, "grad_norm": 649.8385620117188, "learning_rate": 3.5549193738856346e-08, "loss": 86.6366, "step": 119520 }, { "epoch": 0.9887909997104686, "grad_norm": 784.6031494140625, "learning_rate": 3.5381509401701264e-08, "loss": 72.6148, "step": 119530 }, { "epoch": 0.9888737229598379, "grad_norm": 1095.903564453125, "learning_rate": 3.5214220076455474e-08, "loss": 120.5914, "step": 119540 }, { "epoch": 0.9889564462092071, "grad_norm": 954.7218017578125, "learning_rate": 3.5047325776428884e-08, "loss": 101.9791, "step": 119550 }, { "epoch": 0.9890391694585763, "grad_norm": 1238.466796875, "learning_rate": 3.488082651490032e-08, "loss": 100.0169, "step": 119560 }, { "epoch": 0.9891218927079456, "grad_norm": 688.223388671875, "learning_rate": 3.471472230511752e-08, "loss": 96.7697, "step": 119570 }, { "epoch": 0.9892046159573148, "grad_norm": 788.05859375, "learning_rate": 3.454901316029657e-08, "loss": 100.5506, "step": 119580 }, { "epoch": 0.989287339206684, "grad_norm": 1479.240234375, "learning_rate": 3.438369909362249e-08, "loss": 109.9174, "step": 119590 }, { "epoch": 0.9893700624560533, "grad_norm": 831.3067016601562, "learning_rate": 3.421878011824864e-08, "loss": 110.4286, "step": 119600 }, { "epoch": 0.9894527857054225, "grad_norm": 924.3592529296875, "learning_rate": 3.405425624729619e-08, "loss": 91.2705, "step": 119610 }, { "epoch": 0.9895355089547917, "grad_norm": 674.0458374023438, "learning_rate": 3.389012749385578e-08, "loss": 74.0681, "step": 119620 }, { "epoch": 0.989618232204161, "grad_norm": 523.977294921875, "learning_rate": 3.3726393870986976e-08, "loss": 97.7908, "step": 119630 }, { "epoch": 0.9897009554535302, "grad_norm": 619.9107666015625, "learning_rate": 3.356305539171656e-08, "loss": 99.3192, "step": 119640 }, { "epoch": 0.9897836787028994, "grad_norm": 644.1179809570312, "learning_rate": 3.340011206904137e-08, "loss": 80.9895, "step": 119650 }, { "epoch": 0.9898664019522687, "grad_norm": 646.9247436523438, "learning_rate": 3.323756391592548e-08, "loss": 75.6963, "step": 119660 }, { "epoch": 0.9899491252016379, "grad_norm": 613.2579956054688, "learning_rate": 3.307541094530242e-08, "loss": 106.8036, "step": 119670 }, { "epoch": 0.9900318484510071, "grad_norm": 903.300048828125, "learning_rate": 3.291365317007355e-08, "loss": 66.5273, "step": 119680 }, { "epoch": 0.9901145717003764, "grad_norm": 749.8668212890625, "learning_rate": 3.2752290603109694e-08, "loss": 81.0457, "step": 119690 }, { "epoch": 0.9901972949497456, "grad_norm": 855.69677734375, "learning_rate": 3.25913232572489e-08, "loss": 103.7646, "step": 119700 }, { "epoch": 0.9902800181991148, "grad_norm": 640.4257202148438, "learning_rate": 3.243075114529981e-08, "loss": 88.8553, "step": 119710 }, { "epoch": 0.9903627414484841, "grad_norm": 642.8728637695312, "learning_rate": 3.2270574280037213e-08, "loss": 89.717, "step": 119720 }, { "epoch": 0.9904454646978533, "grad_norm": 1653.7088623046875, "learning_rate": 3.211079267420647e-08, "loss": 106.5534, "step": 119730 }, { "epoch": 0.9905281879472225, "grad_norm": 1032.344970703125, "learning_rate": 3.195140634052074e-08, "loss": 83.4833, "step": 119740 }, { "epoch": 0.9906109111965918, "grad_norm": 886.9328002929688, "learning_rate": 3.179241529166099e-08, "loss": 82.0412, "step": 119750 }, { "epoch": 0.990693634445961, "grad_norm": 930.1018676757812, "learning_rate": 3.163381954027822e-08, "loss": 77.0731, "step": 119760 }, { "epoch": 0.9907763576953302, "grad_norm": 1377.98388671875, "learning_rate": 3.147561909899066e-08, "loss": 89.5693, "step": 119770 }, { "epoch": 0.9908590809446995, "grad_norm": 943.3343505859375, "learning_rate": 3.131781398038547e-08, "loss": 82.712, "step": 119780 }, { "epoch": 0.9909418041940687, "grad_norm": 901.633056640625, "learning_rate": 3.1160404197018155e-08, "loss": 70.4035, "step": 119790 }, { "epoch": 0.9910245274434379, "grad_norm": 1146.6190185546875, "learning_rate": 3.100338976141426e-08, "loss": 68.581, "step": 119800 }, { "epoch": 0.9911072506928073, "grad_norm": 1056.26904296875, "learning_rate": 3.084677068606545e-08, "loss": 76.8607, "step": 119810 }, { "epoch": 0.9911899739421764, "grad_norm": 531.5973510742188, "learning_rate": 3.0690546983433986e-08, "loss": 101.3226, "step": 119820 }, { "epoch": 0.9912726971915456, "grad_norm": 1205.59521484375, "learning_rate": 3.053471866594993e-08, "loss": 120.3366, "step": 119830 }, { "epoch": 0.991355420440915, "grad_norm": 904.2116088867188, "learning_rate": 3.0379285746011125e-08, "loss": 75.5825, "step": 119840 }, { "epoch": 0.9914381436902842, "grad_norm": 891.4710083007812, "learning_rate": 3.022424823598546e-08, "loss": 89.1019, "step": 119850 }, { "epoch": 0.9915208669396534, "grad_norm": 869.971923828125, "learning_rate": 3.0069606148208085e-08, "loss": 88.6072, "step": 119860 }, { "epoch": 0.9916035901890227, "grad_norm": 634.763916015625, "learning_rate": 2.991535949498303e-08, "loss": 87.1923, "step": 119870 }, { "epoch": 0.9916863134383919, "grad_norm": 789.7999877929688, "learning_rate": 2.9761508288583262e-08, "loss": 72.2758, "step": 119880 }, { "epoch": 0.9917690366877611, "grad_norm": 535.9653930664062, "learning_rate": 2.96080525412501e-08, "loss": 92.1307, "step": 119890 }, { "epoch": 0.9918517599371304, "grad_norm": 1039.1324462890625, "learning_rate": 2.9454992265193216e-08, "loss": 54.078, "step": 119900 }, { "epoch": 0.9919344831864996, "grad_norm": 616.3204345703125, "learning_rate": 2.9302327472590653e-08, "loss": 85.4111, "step": 119910 }, { "epoch": 0.9920172064358688, "grad_norm": 799.8880615234375, "learning_rate": 2.9150058175589356e-08, "loss": 82.8982, "step": 119920 }, { "epoch": 0.9920999296852381, "grad_norm": 1155.7872314453125, "learning_rate": 2.8998184386305196e-08, "loss": 100.5487, "step": 119930 }, { "epoch": 0.9921826529346073, "grad_norm": 1084.304443359375, "learning_rate": 2.8846706116821834e-08, "loss": 90.8323, "step": 119940 }, { "epoch": 0.9922653761839765, "grad_norm": 1023.7007446289062, "learning_rate": 2.8695623379191296e-08, "loss": 71.3728, "step": 119950 }, { "epoch": 0.9923480994333458, "grad_norm": 681.5350952148438, "learning_rate": 2.8544936185434525e-08, "loss": 76.9926, "step": 119960 }, { "epoch": 0.992430822682715, "grad_norm": 1248.9072265625, "learning_rate": 2.8394644547541373e-08, "loss": 89.5176, "step": 119970 }, { "epoch": 0.9925135459320842, "grad_norm": 609.6895751953125, "learning_rate": 2.8244748477470052e-08, "loss": 81.6998, "step": 119980 }, { "epoch": 0.9925962691814535, "grad_norm": 660.6539306640625, "learning_rate": 2.809524798714658e-08, "loss": 99.3655, "step": 119990 }, { "epoch": 0.9926789924308227, "grad_norm": 712.8468017578125, "learning_rate": 2.7946143088466437e-08, "loss": 69.3642, "step": 120000 }, { "epoch": 0.9927617156801919, "grad_norm": 1156.4256591796875, "learning_rate": 2.7797433793292915e-08, "loss": 94.3238, "step": 120010 }, { "epoch": 0.9928444389295612, "grad_norm": 956.7645874023438, "learning_rate": 2.7649120113458217e-08, "loss": 95.6845, "step": 120020 }, { "epoch": 0.9929271621789304, "grad_norm": 726.518310546875, "learning_rate": 2.7501202060763454e-08, "loss": 83.2681, "step": 120030 }, { "epoch": 0.9930098854282996, "grad_norm": 1263.5406494140625, "learning_rate": 2.7353679646976995e-08, "loss": 91.9831, "step": 120040 }, { "epoch": 0.9930926086776689, "grad_norm": 1196.4813232421875, "learning_rate": 2.7206552883836667e-08, "loss": 97.7018, "step": 120050 }, { "epoch": 0.9931753319270381, "grad_norm": 481.29241943359375, "learning_rate": 2.705982178304922e-08, "loss": 94.1881, "step": 120060 }, { "epoch": 0.9932580551764073, "grad_norm": 627.6724853515625, "learning_rate": 2.691348635628921e-08, "loss": 86.9981, "step": 120070 }, { "epoch": 0.9933407784257766, "grad_norm": 792.4179077148438, "learning_rate": 2.676754661519898e-08, "loss": 85.3965, "step": 120080 }, { "epoch": 0.9934235016751458, "grad_norm": 1112.1466064453125, "learning_rate": 2.662200257139147e-08, "loss": 107.8607, "step": 120090 }, { "epoch": 0.993506224924515, "grad_norm": 851.7528076171875, "learning_rate": 2.6476854236446858e-08, "loss": 81.0411, "step": 120100 }, { "epoch": 0.9935889481738843, "grad_norm": 679.0064697265625, "learning_rate": 2.6332101621913133e-08, "loss": 100.3239, "step": 120110 }, { "epoch": 0.9936716714232535, "grad_norm": 970.4327392578125, "learning_rate": 2.6187744739308297e-08, "loss": 93.8976, "step": 120120 }, { "epoch": 0.9937543946726227, "grad_norm": 783.4990234375, "learning_rate": 2.604378360011761e-08, "loss": 81.577, "step": 120130 }, { "epoch": 0.993837117921992, "grad_norm": 464.9545593261719, "learning_rate": 2.5900218215795802e-08, "loss": 64.9535, "step": 120140 }, { "epoch": 0.9939198411713612, "grad_norm": 1092.5909423828125, "learning_rate": 2.57570485977654e-08, "loss": 81.3496, "step": 120150 }, { "epoch": 0.9940025644207304, "grad_norm": 300.0126647949219, "learning_rate": 2.5614274757417846e-08, "loss": 73.0116, "step": 120160 }, { "epoch": 0.9940852876700996, "grad_norm": 1496.8515625, "learning_rate": 2.5471896706113497e-08, "loss": 90.5071, "step": 120170 }, { "epoch": 0.9941680109194689, "grad_norm": 827.8729248046875, "learning_rate": 2.5329914455180516e-08, "loss": 68.6131, "step": 120180 }, { "epoch": 0.9942507341688381, "grad_norm": 1299.62744140625, "learning_rate": 2.5188328015914865e-08, "loss": 96.4743, "step": 120190 }, { "epoch": 0.9943334574182073, "grad_norm": 1085.9984130859375, "learning_rate": 2.5047137399583088e-08, "loss": 90.0169, "step": 120200 }, { "epoch": 0.9944161806675766, "grad_norm": 712.6924438476562, "learning_rate": 2.4906342617418976e-08, "loss": 122.7976, "step": 120210 }, { "epoch": 0.9944989039169458, "grad_norm": 1067.037109375, "learning_rate": 2.4765943680624126e-08, "loss": 105.9706, "step": 120220 }, { "epoch": 0.994581627166315, "grad_norm": 1210.1380615234375, "learning_rate": 2.4625940600369603e-08, "loss": 77.7456, "step": 120230 }, { "epoch": 0.9946643504156844, "grad_norm": 626.2311401367188, "learning_rate": 2.4486333387795935e-08, "loss": 73.6353, "step": 120240 }, { "epoch": 0.9947470736650535, "grad_norm": 1163.8887939453125, "learning_rate": 2.434712205400924e-08, "loss": 72.8398, "step": 120250 }, { "epoch": 0.9948297969144227, "grad_norm": 703.2742919921875, "learning_rate": 2.4208306610087884e-08, "loss": 88.9687, "step": 120260 }, { "epoch": 0.9949125201637921, "grad_norm": 1137.736572265625, "learning_rate": 2.406988706707525e-08, "loss": 121.851, "step": 120270 }, { "epoch": 0.9949952434131613, "grad_norm": 653.0780029296875, "learning_rate": 2.3931863435985303e-08, "loss": 79.4951, "step": 120280 }, { "epoch": 0.9950779666625305, "grad_norm": 1000.9141845703125, "learning_rate": 2.379423572779982e-08, "loss": 72.5086, "step": 120290 }, { "epoch": 0.9951606899118998, "grad_norm": 633.1144409179688, "learning_rate": 2.3657003953468926e-08, "loss": 94.2369, "step": 120300 }, { "epoch": 0.995243413161269, "grad_norm": 827.4677734375, "learning_rate": 2.352016812391278e-08, "loss": 79.3089, "step": 120310 }, { "epoch": 0.9953261364106382, "grad_norm": 759.8338012695312, "learning_rate": 2.3383728250017112e-08, "loss": 57.8192, "step": 120320 }, { "epoch": 0.9954088596600075, "grad_norm": 735.8635864257812, "learning_rate": 2.3247684342639355e-08, "loss": 92.8142, "step": 120330 }, { "epoch": 0.9954915829093767, "grad_norm": 739.5372314453125, "learning_rate": 2.311203641260251e-08, "loss": 82.3384, "step": 120340 }, { "epoch": 0.9955743061587459, "grad_norm": 755.5853881835938, "learning_rate": 2.2976784470700174e-08, "loss": 117.4208, "step": 120350 }, { "epoch": 0.9956570294081152, "grad_norm": 1694.4178466796875, "learning_rate": 2.284192852769429e-08, "loss": 125.9178, "step": 120360 }, { "epoch": 0.9957397526574844, "grad_norm": 635.702880859375, "learning_rate": 2.2707468594313497e-08, "loss": 72.2283, "step": 120370 }, { "epoch": 0.9958224759068536, "grad_norm": 1230.975341796875, "learning_rate": 2.2573404681256463e-08, "loss": 123.6782, "step": 120380 }, { "epoch": 0.9959051991562229, "grad_norm": 678.0013427734375, "learning_rate": 2.243973679919076e-08, "loss": 80.5772, "step": 120390 }, { "epoch": 0.9959879224055921, "grad_norm": 758.3480224609375, "learning_rate": 2.2306464958751217e-08, "loss": 67.9236, "step": 120400 }, { "epoch": 0.9960706456549613, "grad_norm": 788.4033203125, "learning_rate": 2.2173589170541576e-08, "loss": 79.5632, "step": 120410 }, { "epoch": 0.9961533689043306, "grad_norm": 529.8211059570312, "learning_rate": 2.204110944513449e-08, "loss": 113.7574, "step": 120420 }, { "epoch": 0.9962360921536998, "grad_norm": 686.330078125, "learning_rate": 2.1909025793070416e-08, "loss": 67.1674, "step": 120430 }, { "epoch": 0.996318815403069, "grad_norm": 659.1135864257812, "learning_rate": 2.1777338224859278e-08, "loss": 113.0164, "step": 120440 }, { "epoch": 0.9964015386524383, "grad_norm": 969.425048828125, "learning_rate": 2.1646046750978255e-08, "loss": 99.7389, "step": 120450 }, { "epoch": 0.9964842619018075, "grad_norm": 861.7916870117188, "learning_rate": 2.1515151381873435e-08, "loss": 84.4122, "step": 120460 }, { "epoch": 0.9965669851511767, "grad_norm": 389.3373718261719, "learning_rate": 2.1384652127959816e-08, "loss": 83.2005, "step": 120470 }, { "epoch": 0.996649708400546, "grad_norm": 1106.470458984375, "learning_rate": 2.125454899962076e-08, "loss": 96.3944, "step": 120480 }, { "epoch": 0.9967324316499152, "grad_norm": 1094.505859375, "learning_rate": 2.112484200720799e-08, "loss": 83.0741, "step": 120490 }, { "epoch": 0.9968151548992844, "grad_norm": 439.3631896972656, "learning_rate": 2.0995531161041028e-08, "loss": 97.843, "step": 120500 }, { "epoch": 0.9968978781486537, "grad_norm": 987.277099609375, "learning_rate": 2.0866616471409974e-08, "loss": 105.7594, "step": 120510 }, { "epoch": 0.9969806013980229, "grad_norm": 647.2058715820312, "learning_rate": 2.0738097948570514e-08, "loss": 68.505, "step": 120520 }, { "epoch": 0.9970633246473921, "grad_norm": 1555.4075927734375, "learning_rate": 2.0609975602749465e-08, "loss": 75.6344, "step": 120530 }, { "epoch": 0.9971460478967614, "grad_norm": 1339.879638671875, "learning_rate": 2.048224944413979e-08, "loss": 79.3426, "step": 120540 }, { "epoch": 0.9972287711461306, "grad_norm": 446.56878662109375, "learning_rate": 2.035491948290502e-08, "loss": 111.5274, "step": 120550 }, { "epoch": 0.9973114943954998, "grad_norm": 896.1454467773438, "learning_rate": 2.0227985729175393e-08, "loss": 93.6553, "step": 120560 }, { "epoch": 0.9973942176448691, "grad_norm": 909.8489990234375, "learning_rate": 2.0101448193051153e-08, "loss": 81.4754, "step": 120570 }, { "epoch": 0.9974769408942383, "grad_norm": 680.5419921875, "learning_rate": 1.997530688459981e-08, "loss": 64.9978, "step": 120580 }, { "epoch": 0.9975596641436075, "grad_norm": 970.2647705078125, "learning_rate": 1.984956181385833e-08, "loss": 98.7731, "step": 120590 }, { "epoch": 0.9976423873929768, "grad_norm": 1299.3114013671875, "learning_rate": 1.9724212990830938e-08, "loss": 73.416, "step": 120600 }, { "epoch": 0.997725110642346, "grad_norm": 1053.347900390625, "learning_rate": 1.9599260425491873e-08, "loss": 82.9828, "step": 120610 }, { "epoch": 0.9978078338917152, "grad_norm": 1263.4722900390625, "learning_rate": 1.9474704127783184e-08, "loss": 76.9199, "step": 120620 }, { "epoch": 0.9978905571410845, "grad_norm": 490.1828918457031, "learning_rate": 1.9350544107614165e-08, "loss": 77.5371, "step": 120630 }, { "epoch": 0.9979732803904537, "grad_norm": 1117.427734375, "learning_rate": 1.9226780374864695e-08, "loss": 103.023, "step": 120640 }, { "epoch": 0.9980560036398229, "grad_norm": 476.96221923828125, "learning_rate": 1.9103412939381338e-08, "loss": 69.9926, "step": 120650 }, { "epoch": 0.9981387268891923, "grad_norm": 722.0250854492188, "learning_rate": 1.898044181098013e-08, "loss": 101.3328, "step": 120660 }, { "epoch": 0.9982214501385615, "grad_norm": 736.2235107421875, "learning_rate": 1.885786699944492e-08, "loss": 79.6206, "step": 120670 }, { "epoch": 0.9983041733879306, "grad_norm": 1309.5517578125, "learning_rate": 1.873568851452956e-08, "loss": 78.2351, "step": 120680 }, { "epoch": 0.9983868966373, "grad_norm": 752.5504150390625, "learning_rate": 1.8613906365954616e-08, "loss": 87.0025, "step": 120690 }, { "epoch": 0.9984696198866692, "grad_norm": 815.6478881835938, "learning_rate": 1.8492520563409555e-08, "loss": 64.3488, "step": 120700 }, { "epoch": 0.9985523431360384, "grad_norm": 664.5618286132812, "learning_rate": 1.837153111655221e-08, "loss": 70.5869, "step": 120710 }, { "epoch": 0.9986350663854077, "grad_norm": 1548.3511962890625, "learning_rate": 1.825093803500988e-08, "loss": 103.4076, "step": 120720 }, { "epoch": 0.9987177896347769, "grad_norm": 667.9990234375, "learning_rate": 1.813074132837711e-08, "loss": 93.5051, "step": 120730 }, { "epoch": 0.9988005128841461, "grad_norm": 587.8815307617188, "learning_rate": 1.801094100621792e-08, "loss": 84.6205, "step": 120740 }, { "epoch": 0.9988832361335154, "grad_norm": 895.0107421875, "learning_rate": 1.789153707806357e-08, "loss": 76.1649, "step": 120750 }, { "epoch": 0.9989659593828846, "grad_norm": 657.9972534179688, "learning_rate": 1.7772529553414798e-08, "loss": 74.9104, "step": 120760 }, { "epoch": 0.9990486826322538, "grad_norm": 1787.9937744140625, "learning_rate": 1.7653918441740693e-08, "loss": 109.789, "step": 120770 }, { "epoch": 0.9991314058816231, "grad_norm": 562.6156616210938, "learning_rate": 1.753570375247815e-08, "loss": 76.8233, "step": 120780 }, { "epoch": 0.9992141291309923, "grad_norm": 759.29150390625, "learning_rate": 1.7417885495033537e-08, "loss": 86.3301, "step": 120790 }, { "epoch": 0.9992968523803615, "grad_norm": 533.1914672851562, "learning_rate": 1.730046367878102e-08, "loss": 123.4786, "step": 120800 }, { "epoch": 0.9993795756297308, "grad_norm": 1021.0844116210938, "learning_rate": 1.7183438313062573e-08, "loss": 79.5246, "step": 120810 }, { "epoch": 0.9994622988791, "grad_norm": 579.9783935546875, "learning_rate": 1.7066809407190187e-08, "loss": 84.2175, "step": 120820 }, { "epoch": 0.9995450221284692, "grad_norm": 669.8778076171875, "learning_rate": 1.695057697044311e-08, "loss": 100.9156, "step": 120830 }, { "epoch": 0.9996277453778385, "grad_norm": 494.1153259277344, "learning_rate": 1.68347410120695e-08, "loss": 78.8109, "step": 120840 }, { "epoch": 0.9997104686272077, "grad_norm": 1579.7835693359375, "learning_rate": 1.671930154128587e-08, "loss": 84.4901, "step": 120850 }, { "epoch": 0.9997931918765769, "grad_norm": 1268.185546875, "learning_rate": 1.6604258567277652e-08, "loss": 77.1438, "step": 120860 }, { "epoch": 0.9998759151259462, "grad_norm": 618.141845703125, "learning_rate": 1.6489612099197527e-08, "loss": 88.3295, "step": 120870 }, { "epoch": 0.9999586383753154, "grad_norm": 1028.1591796875, "learning_rate": 1.637536214616764e-08, "loss": 69.9013, "step": 120880 }, { "epoch": 1.0, "eval_loss": 76.04621887207031, "eval_runtime": 214.3291, "eval_samples_per_second": 45.579, "eval_steps_per_second": 5.702, "step": 120885 }, { "epoch": 1.0000413616246846, "grad_norm": 835.1196899414062, "learning_rate": 1.6261508717278497e-08, "loss": 103.3933, "step": 120890 }, { "epoch": 1.0001240848740538, "grad_norm": 625.5361938476562, "learning_rate": 1.614805182158896e-08, "loss": 81.46, "step": 120900 }, { "epoch": 1.000206808123423, "grad_norm": 715.9172973632812, "learning_rate": 1.603499146812626e-08, "loss": 85.437, "step": 120910 }, { "epoch": 1.0002895313727924, "grad_norm": 975.20361328125, "learning_rate": 1.5922327665885416e-08, "loss": 100.9134, "step": 120920 }, { "epoch": 1.0003722546221616, "grad_norm": 1666.02734375, "learning_rate": 1.5810060423831487e-08, "loss": 88.4254, "step": 120930 }, { "epoch": 1.0004549778715308, "grad_norm": 772.5983276367188, "learning_rate": 1.5698189750896762e-08, "loss": 97.352, "step": 120940 }, { "epoch": 1.0005377011209, "grad_norm": 1283.052490234375, "learning_rate": 1.5586715655982463e-08, "loss": 107.4238, "step": 120950 }, { "epoch": 1.0006204243702692, "grad_norm": 646.5191650390625, "learning_rate": 1.5475638147957607e-08, "loss": 72.1843, "step": 120960 }, { "epoch": 1.0007031476196384, "grad_norm": 777.9674072265625, "learning_rate": 1.5364957235660115e-08, "loss": 90.0959, "step": 120970 }, { "epoch": 1.0007858708690078, "grad_norm": 364.2453308105469, "learning_rate": 1.525467292789684e-08, "loss": 74.3597, "step": 120980 }, { "epoch": 1.000868594118377, "grad_norm": 626.4849853515625, "learning_rate": 1.5144785233442428e-08, "loss": 70.714, "step": 120990 }, { "epoch": 1.0009513173677462, "grad_norm": 418.33367919921875, "learning_rate": 1.5035294161039882e-08, "loss": 81.4252, "step": 121000 }, { "epoch": 1.0010340406171154, "grad_norm": 1237.5550537109375, "learning_rate": 1.4926199719401124e-08, "loss": 93.9281, "step": 121010 }, { "epoch": 1.0011167638664846, "grad_norm": 523.1364135742188, "learning_rate": 1.4817501917205879e-08, "loss": 67.6458, "step": 121020 }, { "epoch": 1.0011994871158538, "grad_norm": 531.9727172851562, "learning_rate": 1.4709200763103892e-08, "loss": 67.4297, "step": 121030 }, { "epoch": 1.0012822103652232, "grad_norm": 2390.410888671875, "learning_rate": 1.460129626571105e-08, "loss": 97.8159, "step": 121040 }, { "epoch": 1.0013649336145924, "grad_norm": 923.2665405273438, "learning_rate": 1.449378843361271e-08, "loss": 96.3494, "step": 121050 }, { "epoch": 1.0014476568639616, "grad_norm": 484.8533630371094, "learning_rate": 1.4386677275363692e-08, "loss": 62.1874, "step": 121060 }, { "epoch": 1.0015303801133308, "grad_norm": 816.00146484375, "learning_rate": 1.4279962799486069e-08, "loss": 85.6988, "step": 121070 }, { "epoch": 1.0016131033627, "grad_norm": 547.6641235351562, "learning_rate": 1.4173645014470272e-08, "loss": 101.0872, "step": 121080 }, { "epoch": 1.0016958266120692, "grad_norm": 648.3076782226562, "learning_rate": 1.4067723928775645e-08, "loss": 74.9175, "step": 121090 }, { "epoch": 1.0017785498614387, "grad_norm": 828.8029174804688, "learning_rate": 1.3962199550829892e-08, "loss": 83.7772, "step": 121100 }, { "epoch": 1.0018612731108079, "grad_norm": 672.8508911132812, "learning_rate": 1.3857071889029073e-08, "loss": 99.7323, "step": 121110 }, { "epoch": 1.001943996360177, "grad_norm": 847.6339111328125, "learning_rate": 1.3752340951737609e-08, "loss": 83.5709, "step": 121120 }, { "epoch": 1.0020267196095463, "grad_norm": 1562.5223388671875, "learning_rate": 1.3648006747288833e-08, "loss": 85.9273, "step": 121130 }, { "epoch": 1.0021094428589155, "grad_norm": 1429.5631103515625, "learning_rate": 1.354406928398333e-08, "loss": 100.3321, "step": 121140 }, { "epoch": 1.0021921661082847, "grad_norm": 858.3600463867188, "learning_rate": 1.3440528570092259e-08, "loss": 88.2199, "step": 121150 }, { "epoch": 1.002274889357654, "grad_norm": 965.1406860351562, "learning_rate": 1.3337384613852922e-08, "loss": 71.9796, "step": 121160 }, { "epoch": 1.0023576126070233, "grad_norm": 1044.383056640625, "learning_rate": 1.3234637423472085e-08, "loss": 76.3436, "step": 121170 }, { "epoch": 1.0024403358563925, "grad_norm": 696.2551879882812, "learning_rate": 1.3132287007124877e-08, "loss": 70.0724, "step": 121180 }, { "epoch": 1.0025230591057617, "grad_norm": 814.8746948242188, "learning_rate": 1.3030333372954784e-08, "loss": 69.551, "step": 121190 }, { "epoch": 1.0026057823551309, "grad_norm": 665.35009765625, "learning_rate": 1.2928776529074205e-08, "loss": 96.7717, "step": 121200 }, { "epoch": 1.0026885056045, "grad_norm": 1074.0308837890625, "learning_rate": 1.2827616483563343e-08, "loss": 147.5443, "step": 121210 }, { "epoch": 1.0027712288538695, "grad_norm": 541.9178466796875, "learning_rate": 1.2726853244471316e-08, "loss": 80.1292, "step": 121220 }, { "epoch": 1.0028539521032387, "grad_norm": 1141.502685546875, "learning_rate": 1.2626486819814488e-08, "loss": 112.4385, "step": 121230 }, { "epoch": 1.0029366753526079, "grad_norm": 396.3456115722656, "learning_rate": 1.2526517217579248e-08, "loss": 74.7738, "step": 121240 }, { "epoch": 1.003019398601977, "grad_norm": 824.7189331054688, "learning_rate": 1.2426944445719791e-08, "loss": 81.8229, "step": 121250 }, { "epoch": 1.0031021218513463, "grad_norm": 322.1405334472656, "learning_rate": 1.2327768512158667e-08, "loss": 62.011, "step": 121260 }, { "epoch": 1.0031848451007155, "grad_norm": 780.4888916015625, "learning_rate": 1.2228989424786786e-08, "loss": 79.2352, "step": 121270 }, { "epoch": 1.0032675683500847, "grad_norm": 664.0239868164062, "learning_rate": 1.2130607191462863e-08, "loss": 78.1969, "step": 121280 }, { "epoch": 1.003350291599454, "grad_norm": 737.7160034179688, "learning_rate": 1.2032621820015633e-08, "loss": 89.1368, "step": 121290 }, { "epoch": 1.0034330148488233, "grad_norm": 988.8171997070312, "learning_rate": 1.1935033318241084e-08, "loss": 84.0071, "step": 121300 }, { "epoch": 1.0035157380981925, "grad_norm": 708.0661010742188, "learning_rate": 1.1837841693904118e-08, "loss": 72.5567, "step": 121310 }, { "epoch": 1.0035984613475617, "grad_norm": 664.5126342773438, "learning_rate": 1.174104695473688e-08, "loss": 67.6821, "step": 121320 }, { "epoch": 1.003681184596931, "grad_norm": 548.2943725585938, "learning_rate": 1.1644649108441542e-08, "loss": 95.8433, "step": 121330 }, { "epoch": 1.0037639078463, "grad_norm": 988.5643310546875, "learning_rate": 1.1548648162688081e-08, "loss": 87.8513, "step": 121340 }, { "epoch": 1.0038466310956695, "grad_norm": 895.0706176757812, "learning_rate": 1.1453044125114832e-08, "loss": 89.9198, "step": 121350 }, { "epoch": 1.0039293543450387, "grad_norm": 858.029296875, "learning_rate": 1.1357837003329042e-08, "loss": 67.4552, "step": 121360 }, { "epoch": 1.004012077594408, "grad_norm": 605.3368530273438, "learning_rate": 1.1263026804904653e-08, "loss": 54.8068, "step": 121370 }, { "epoch": 1.0040948008437771, "grad_norm": 557.933349609375, "learning_rate": 1.1168613537386186e-08, "loss": 79.3044, "step": 121380 }, { "epoch": 1.0041775240931463, "grad_norm": 797.0153198242188, "learning_rate": 1.1074597208285965e-08, "loss": 85.258, "step": 121390 }, { "epoch": 1.0042602473425155, "grad_norm": 749.7955932617188, "learning_rate": 1.0980977825083561e-08, "loss": 67.815, "step": 121400 }, { "epoch": 1.004342970591885, "grad_norm": 1231.0704345703125, "learning_rate": 1.0887755395228018e-08, "loss": 87.1247, "step": 121410 }, { "epoch": 1.0044256938412541, "grad_norm": 730.1659545898438, "learning_rate": 1.0794929926137287e-08, "loss": 87.4435, "step": 121420 }, { "epoch": 1.0045084170906233, "grad_norm": 1234.9593505859375, "learning_rate": 1.0702501425196576e-08, "loss": 78.5555, "step": 121430 }, { "epoch": 1.0045911403399925, "grad_norm": 728.4467163085938, "learning_rate": 1.0610469899760001e-08, "loss": 91.6618, "step": 121440 }, { "epoch": 1.0046738635893617, "grad_norm": 939.027099609375, "learning_rate": 1.0518835357150036e-08, "loss": 77.4593, "step": 121450 }, { "epoch": 1.004756586838731, "grad_norm": 785.4007568359375, "learning_rate": 1.0427597804657518e-08, "loss": 100.9078, "step": 121460 }, { "epoch": 1.0048393100881003, "grad_norm": 932.05224609375, "learning_rate": 1.0336757249542195e-08, "loss": 77.0113, "step": 121470 }, { "epoch": 1.0049220333374695, "grad_norm": 836.0494995117188, "learning_rate": 1.0246313699031618e-08, "loss": 100.5597, "step": 121480 }, { "epoch": 1.0050047565868387, "grad_norm": 1092.8907470703125, "learning_rate": 1.0156267160322253e-08, "loss": 95.8895, "step": 121490 }, { "epoch": 1.005087479836208, "grad_norm": 1151.7713623046875, "learning_rate": 1.006661764057837e-08, "loss": 119.7996, "step": 121500 }, { "epoch": 1.0051702030855771, "grad_norm": 1046.9149169921875, "learning_rate": 9.977365146932595e-09, "loss": 54.5647, "step": 121510 }, { "epoch": 1.0052529263349463, "grad_norm": 482.7346496582031, "learning_rate": 9.888509686487025e-09, "loss": 66.3425, "step": 121520 }, { "epoch": 1.0053356495843158, "grad_norm": 771.5325927734375, "learning_rate": 9.800051266311006e-09, "loss": 84.2417, "step": 121530 }, { "epoch": 1.005418372833685, "grad_norm": 931.7367553710938, "learning_rate": 9.711989893443353e-09, "loss": 84.3377, "step": 121540 }, { "epoch": 1.0055010960830542, "grad_norm": 843.2858276367188, "learning_rate": 9.624325574890125e-09, "loss": 74.5406, "step": 121550 }, { "epoch": 1.0055838193324234, "grad_norm": 802.0359497070312, "learning_rate": 9.537058317626857e-09, "loss": 72.5852, "step": 121560 }, { "epoch": 1.0056665425817926, "grad_norm": 631.546875, "learning_rate": 9.450188128596328e-09, "loss": 87.635, "step": 121570 }, { "epoch": 1.0057492658311618, "grad_norm": 947.8718872070312, "learning_rate": 9.363715014710784e-09, "loss": 92.5211, "step": 121580 }, { "epoch": 1.0058319890805312, "grad_norm": 304.4026184082031, "learning_rate": 9.277638982850835e-09, "loss": 99.9868, "step": 121590 }, { "epoch": 1.0059147123299004, "grad_norm": 926.6283569335938, "learning_rate": 9.191960039864334e-09, "loss": 78.2546, "step": 121600 }, { "epoch": 1.0059974355792696, "grad_norm": 812.9754028320312, "learning_rate": 9.106678192569718e-09, "loss": 71.0503, "step": 121610 }, { "epoch": 1.0060801588286388, "grad_norm": 1050.507080078125, "learning_rate": 9.021793447750448e-09, "loss": 83.0579, "step": 121620 }, { "epoch": 1.006162882078008, "grad_norm": 738.1742553710938, "learning_rate": 8.937305812162234e-09, "loss": 83.2435, "step": 121630 }, { "epoch": 1.0062456053273772, "grad_norm": 946.2736206054688, "learning_rate": 8.853215292526917e-09, "loss": 77.3213, "step": 121640 }, { "epoch": 1.0063283285767466, "grad_norm": 717.0030517578125, "learning_rate": 8.769521895534705e-09, "loss": 60.9286, "step": 121650 }, { "epoch": 1.0064110518261158, "grad_norm": 722.735107421875, "learning_rate": 8.686225627845268e-09, "loss": 60.4515, "step": 121660 }, { "epoch": 1.006493775075485, "grad_norm": 477.0111083984375, "learning_rate": 8.603326496085529e-09, "loss": 103.6363, "step": 121670 }, { "epoch": 1.0065764983248542, "grad_norm": 554.1845092773438, "learning_rate": 8.520824506851877e-09, "loss": 88.1589, "step": 121680 }, { "epoch": 1.0066592215742234, "grad_norm": 795.9306640625, "learning_rate": 8.438719666707951e-09, "loss": 70.1685, "step": 121690 }, { "epoch": 1.0067419448235926, "grad_norm": 959.230224609375, "learning_rate": 8.357011982187412e-09, "loss": 82.1214, "step": 121700 }, { "epoch": 1.006824668072962, "grad_norm": 684.617919921875, "learning_rate": 8.27570145979062e-09, "loss": 75.703, "step": 121710 }, { "epoch": 1.0069073913223312, "grad_norm": 424.90740966796875, "learning_rate": 8.194788105987395e-09, "loss": 74.4388, "step": 121720 }, { "epoch": 1.0069901145717004, "grad_norm": 777.08251953125, "learning_rate": 8.114271927215923e-09, "loss": 97.2741, "step": 121730 }, { "epoch": 1.0070728378210696, "grad_norm": 970.0604858398438, "learning_rate": 8.034152929881633e-09, "loss": 78.7951, "step": 121740 }, { "epoch": 1.0071555610704388, "grad_norm": 800.564697265625, "learning_rate": 7.954431120359985e-09, "loss": 73.0719, "step": 121750 }, { "epoch": 1.007238284319808, "grad_norm": 970.1133422851562, "learning_rate": 7.875106504994234e-09, "loss": 84.3109, "step": 121760 }, { "epoch": 1.0073210075691774, "grad_norm": 535.0064086914062, "learning_rate": 7.796179090094891e-09, "loss": 114.3683, "step": 121770 }, { "epoch": 1.0074037308185466, "grad_norm": 1022.1173095703125, "learning_rate": 7.71764888194304e-09, "loss": 74.4927, "step": 121780 }, { "epoch": 1.0074864540679158, "grad_norm": 387.936767578125, "learning_rate": 7.63951588678591e-09, "loss": 93.5733, "step": 121790 }, { "epoch": 1.007569177317285, "grad_norm": 637.2479248046875, "learning_rate": 7.561780110840744e-09, "loss": 64.9509, "step": 121800 }, { "epoch": 1.0076519005666542, "grad_norm": 817.9259033203125, "learning_rate": 7.484441560292599e-09, "loss": 107.0969, "step": 121810 }, { "epoch": 1.0077346238160234, "grad_norm": 1081.365234375, "learning_rate": 7.407500241294885e-09, "loss": 68.617, "step": 121820 }, { "epoch": 1.0078173470653928, "grad_norm": 1211.7698974609375, "learning_rate": 7.3309561599693715e-09, "loss": 83.9199, "step": 121830 }, { "epoch": 1.007900070314762, "grad_norm": 497.444091796875, "learning_rate": 7.254809322406742e-09, "loss": 69.8575, "step": 121840 }, { "epoch": 1.0079827935641312, "grad_norm": 490.0628967285156, "learning_rate": 7.1790597346649286e-09, "loss": 70.6277, "step": 121850 }, { "epoch": 1.0080655168135004, "grad_norm": 1028.51953125, "learning_rate": 7.103707402771887e-09, "loss": 88.6616, "step": 121860 }, { "epoch": 1.0081482400628696, "grad_norm": 2735.880126953125, "learning_rate": 7.028752332722266e-09, "loss": 106.7273, "step": 121870 }, { "epoch": 1.0082309633122388, "grad_norm": 659.5089111328125, "learning_rate": 6.954194530480185e-09, "loss": 65.3164, "step": 121880 }, { "epoch": 1.0083136865616082, "grad_norm": 610.6782836914062, "learning_rate": 6.880034001977565e-09, "loss": 66.6304, "step": 121890 }, { "epoch": 1.0083964098109774, "grad_norm": 908.3348999023438, "learning_rate": 6.806270753115796e-09, "loss": 91.2019, "step": 121900 }, { "epoch": 1.0084791330603466, "grad_norm": 1129.9967041015625, "learning_rate": 6.732904789762962e-09, "loss": 87.364, "step": 121910 }, { "epoch": 1.0085618563097158, "grad_norm": 1457.5179443359375, "learning_rate": 6.659936117757171e-09, "loss": 100.0551, "step": 121920 }, { "epoch": 1.008644579559085, "grad_norm": 696.4898681640625, "learning_rate": 6.587364742903779e-09, "loss": 98.5237, "step": 121930 }, { "epoch": 1.0087273028084542, "grad_norm": 702.8273315429688, "learning_rate": 6.515190670977057e-09, "loss": 54.427, "step": 121940 }, { "epoch": 1.0088100260578237, "grad_norm": 1061.605224609375, "learning_rate": 6.4434139077201865e-09, "loss": 81.1102, "step": 121950 }, { "epoch": 1.0088927493071929, "grad_norm": 623.2269287109375, "learning_rate": 6.3720344588430464e-09, "loss": 117.4438, "step": 121960 }, { "epoch": 1.008975472556562, "grad_norm": 1200.9871826171875, "learning_rate": 6.301052330025537e-09, "loss": 97.576, "step": 121970 }, { "epoch": 1.0090581958059313, "grad_norm": 605.901611328125, "learning_rate": 6.230467526915362e-09, "loss": 63.3802, "step": 121980 }, { "epoch": 1.0091409190553005, "grad_norm": 609.5484619140625, "learning_rate": 6.160280055128032e-09, "loss": 70.7963, "step": 121990 }, { "epoch": 1.0092236423046697, "grad_norm": 692.6685180664062, "learning_rate": 6.090489920249076e-09, "loss": 90.4518, "step": 122000 } ], "logging_steps": 10, "max_steps": 123750, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }